1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #include <rte_eventdev.h>
11 #define NIX_TX_OFFLOAD_NONE (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
20 /* Flags to control xmit_prepare function.
21 * Defining it from backwards to denote its been
22 * not used as offload flags to pick function
24 #define NIX_TX_VWQE_F BIT(14)
25 #define NIX_TX_MULTI_SEG_F BIT(15)
27 #define NIX_TX_NEED_SEND_HDR_W1 \
28 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
29 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31 #define NIX_TX_NEED_EXT_HDR \
32 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
35 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
37 /* Cached value is low, Update the fc_cache_pkts */ \
38 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
39 /* Multiply with sqe_per_sqb to express in pkts */ \
40 (txq)->fc_cache_pkts = \
41 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
42 << (txq)->sqes_per_sqb_log2; \
43 /* Check it again for the room */ \
44 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
49 /* Encoded number of segments to number of dwords macro, each value of nb_segs
50 * is encoded as 4bits.
52 #define NIX_SEGDW_MAGIC 0x76654432210ULL
54 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56 /* Function to determine no of tx subdesc required in case ext
57 * sub desc is enabled.
59 static __rte_always_inline int
60 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
65 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
70 static __rte_always_inline uint8_t
71 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73 if (!(flags & NIX_TX_MULTI_SEG_F))
74 return cn10k_nix_tx_ext_subs(flags) + 2;
76 /* Already everything is accounted for in segdw */
80 static __rte_always_inline uint8_t
81 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
84 << ROC_LMT_LINES_PER_CORE_LOG2;
87 static __rte_always_inline uint8_t
88 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90 return (flags & NIX_TX_NEED_EXT_HDR) ?
91 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
95 static __rte_always_inline uint64_t
96 cn10k_nix_tx_steor_data(const uint16_t flags)
98 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
101 /* This will be moved to addr area */
103 /* 15 vector sizes for single seg */
123 static __rte_always_inline uint8_t
124 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126 return ((flags & NIX_TX_NEED_EXT_HDR) ?
127 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
131 static __rte_always_inline uint64_t
132 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
137 /* This will be moved to addr area */
139 /* 15 vector sizes for single seg */
159 static __rte_always_inline uint64_t
160 cn10k_cpt_tx_steor_data(void)
162 /* We have two CPT instructions per LMTLine */
163 const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
166 /* This will be moved to addr area */
187 static __rte_always_inline void
188 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
189 const uint16_t flags)
192 cmd[0] = txq->send_hdr_w0;
196 /* Send ext if present */
197 if (flags & NIX_TX_NEED_EXT_HDR) {
198 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
207 static __rte_always_inline void
208 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
209 uint8_t loff, uint8_t shft)
214 /* Check if there is any CPT instruction to submit */
218 data = cn10k_cpt_tx_steor_data();
219 /* Update lmtline use for partial end line */
221 data &= ~(0x7ULL << shft);
222 /* Update it to half full i.e 64B */
223 data |= (0x3UL << shft);
226 pa = io_addr | ((data >> 16) & 0x7) << 4;
227 data &= ~(0x7ULL << 16);
228 /* Update lines - 1 that contain valid data */
229 data |= ((uint64_t)(lnum + loff - 1)) << 12;
233 roc_lmt_submit_steorl(data, pa);
236 #if defined(RTE_ARCH_ARM64)
237 static __rte_always_inline void
238 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
239 uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
240 uint8_t *loff, uint8_t *shft, uint64_t sa_base,
241 const uint16_t flags)
243 struct cn10k_sec_sess_priv sess_priv;
244 uint32_t pkt_len, dlen_adj, rlen;
245 uint64x2_t cmd01, cmd23;
246 uintptr_t dptr, nixtx;
247 uint64_t ucode_cmd[4];
253 sess_priv.u64 = *rte_security_dynfield(m);
255 if (flags & NIX_TX_NEED_SEND_HDR_W1)
256 l2_len = vgetq_lane_u8(*cmd0, 8);
261 dptr = vgetq_lane_u64(*cmd1, 1);
262 pkt_len = vgetq_lane_u16(*cmd0, 0);
264 /* Calculate dlen adj */
265 dlen_adj = pkt_len - l2_len;
266 rlen = (dlen_adj + sess_priv.roundup_len) +
267 (sess_priv.roundup_byte - 1);
268 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
269 rlen += sess_priv.partial_len;
270 dlen_adj = rlen - dlen_adj;
272 /* Update send descriptors. Security is single segment only */
273 *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
274 *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
276 /* Get area where NIX descriptor needs to be stored */
277 nixtx = dptr + pkt_len + dlen_adj;
279 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
281 /* Return nixtx addr */
282 *nixtx_addr = (nixtx + 16);
284 /* DLEN passed is excluding L2HDR */
286 tag = sa_base & 0xFFFFUL;
287 sa_base &= ~0xFFFFUL;
288 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
289 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
291 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
293 /* CPT Word 0 and Word 1 */
294 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
295 /* CPT_RES_S is 16B above NIXTX */
296 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
298 /* CPT word 2 and 3 */
299 cmd23 = vdupq_n_u64(0);
300 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
301 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
302 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
306 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
307 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
308 *((uint16_t *)(dptr - 2)) =
309 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
311 *((uint16_t *)(dptr - 2)) =
312 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
318 /* Move to our line */
319 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
321 /* Write CPT instruction to lmt line */
322 vst1q_u64(laddr, cmd01);
323 vst1q_u64((laddr + 2), cmd23);
325 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
326 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
328 /* Move to next line for every other CPT inst */
330 *lnum = *lnum + (*loff ? 0 : 1);
331 *shft = *shft + (*loff ? 0 : 3);
334 static __rte_always_inline void
335 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
336 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
337 uint64_t sa_base, const uint16_t flags)
339 struct cn10k_sec_sess_priv sess_priv;
340 uint32_t pkt_len, dlen_adj, rlen;
341 struct nix_send_hdr_s *send_hdr;
342 uint64x2_t cmd01, cmd23;
343 union nix_send_sg_s *sg;
344 uintptr_t dptr, nixtx;
345 uint64_t ucode_cmd[4];
351 /* Move to our line from base */
352 sess_priv.u64 = *rte_security_dynfield(m);
353 send_hdr = (struct nix_send_hdr_s *)cmd;
354 if (flags & NIX_TX_NEED_EXT_HDR)
355 sg = (union nix_send_sg_s *)&cmd[4];
357 sg = (union nix_send_sg_s *)&cmd[2];
359 if (flags & NIX_TX_NEED_SEND_HDR_W1)
360 l2_len = cmd[1] & 0xFF;
365 dptr = *(uint64_t *)(sg + 1);
366 pkt_len = send_hdr->w0.total;
368 /* Calculate dlen adj */
369 dlen_adj = pkt_len - l2_len;
370 rlen = (dlen_adj + sess_priv.roundup_len) +
371 (sess_priv.roundup_byte - 1);
372 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
373 rlen += sess_priv.partial_len;
374 dlen_adj = rlen - dlen_adj;
376 /* Update send descriptors. Security is single segment only */
377 send_hdr->w0.total = pkt_len + dlen_adj;
378 sg->seg1_size = pkt_len + dlen_adj;
380 /* Get area where NIX descriptor needs to be stored */
381 nixtx = dptr + pkt_len + dlen_adj;
383 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
385 /* Return nixtx addr */
386 *nixtx_addr = (nixtx + 16);
388 /* DLEN passed is excluding L2HDR */
390 tag = sa_base & 0xFFFFUL;
391 sa_base &= ~0xFFFFUL;
392 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
393 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
395 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
397 /* CPT Word 0 and Word 1. Assume no multi-seg support */
398 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
399 /* CPT_RES_S is 16B above NIXTX */
400 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
402 /* CPT word 2 and 3 */
403 cmd23 = vdupq_n_u64(0);
404 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
405 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
406 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
410 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
411 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
412 *((uint16_t *)(dptr - 2)) =
413 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
415 *((uint16_t *)(dptr - 2)) =
416 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
421 /* Move to our line */
422 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
424 /* Write CPT instruction to lmt line */
425 vst1q_u64(laddr, cmd01);
426 vst1q_u64((laddr + 2), cmd23);
428 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
429 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
431 /* Move to next line for every other CPT inst */
433 *lnum = *lnum + (*loff ? 0 : 1);
434 *shft = *shft + (*loff ? 0 : 3);
439 static __rte_always_inline void
440 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
441 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
442 uint64_t sa_base, const uint16_t flags)
446 RTE_SET_USED(nixtx_addr);
451 RTE_SET_USED(sa_base);
456 static __rte_always_inline void
457 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
459 uint64_t mask, ol_flags = m->ol_flags;
461 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
462 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
463 uint16_t *iplen, *oiplen, *oudplen;
464 uint16_t lso_sb, paylen;
466 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
467 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
468 m->l2_len + m->l3_len + m->l4_len;
470 /* Reduce payload len from base headers */
471 paylen = m->pkt_len - lso_sb;
473 /* Get iplen position assuming no tunnel hdr */
474 iplen = (uint16_t *)(mdata + m->l2_len +
475 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
476 /* Handle tunnel tso */
477 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
478 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
479 const uint8_t is_udp_tun =
480 (CNXK_NIX_UDP_TUN_BITMASK >>
481 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
484 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
486 RTE_MBUF_F_TX_OUTER_IPV6)));
487 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
490 /* Update format for UDP tunneled packet */
492 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
493 m->outer_l3_len + 4);
494 *oudplen = rte_cpu_to_be_16(
495 rte_be_to_cpu_16(*oudplen) - paylen);
498 /* Update iplen position to inner ip hdr */
499 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
501 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
504 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
508 static __rte_always_inline void
509 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
510 const uint64_t lso_tun_fmt, bool *sec)
512 struct nix_send_ext_s *send_hdr_ext;
513 struct nix_send_hdr_s *send_hdr;
514 uint64_t ol_flags = 0, mask;
515 union nix_send_hdr_w1_u w1;
516 union nix_send_sg_s *sg;
518 send_hdr = (struct nix_send_hdr_s *)cmd;
519 if (flags & NIX_TX_NEED_EXT_HDR) {
520 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
521 sg = (union nix_send_sg_s *)(cmd + 4);
522 /* Clear previous markings */
523 send_hdr_ext->w0.lso = 0;
524 send_hdr_ext->w1.u = 0;
526 sg = (union nix_send_sg_s *)(cmd + 2);
529 if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
530 ol_flags = m->ol_flags;
534 if (!(flags & NIX_TX_MULTI_SEG_F))
535 send_hdr->w0.total = m->data_len;
537 send_hdr->w0.total = m->pkt_len;
538 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
542 * 3 => IPV4 with csum
544 * L3type and L3ptr needs to be set for either
545 * L3 csum or L4 csum or LSO
549 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
550 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
551 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
552 const uint8_t ol3type =
553 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
554 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
555 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
558 w1.ol3type = ol3type;
559 mask = 0xffffull << ((!!ol3type) << 4);
560 w1.ol3ptr = ~mask & m->outer_l2_len;
561 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
564 w1.ol4type = csum + (csum << 1);
567 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
568 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
569 w1.il3ptr = w1.ol4ptr + m->l2_len;
570 w1.il4ptr = w1.il3ptr + m->l3_len;
571 /* Increment it by 1 if it is IPV4 as 3 is with csum */
572 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
575 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
577 /* In case of no tunnel header use only
578 * shift IL3/IL4 fields a bit to use
579 * OL3/OL4 for header checksum
582 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
583 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
585 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
586 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
587 const uint8_t outer_l2_len = m->outer_l2_len;
590 w1.ol3ptr = outer_l2_len;
591 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
592 /* Increment it by 1 if it is IPV4 as 3 is with csum */
593 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
594 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
595 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
598 w1.ol4type = csum + (csum << 1);
600 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
601 const uint8_t l2_len = m->l2_len;
603 /* Always use OLXPTR and OLXTYPE when only
604 * when one header is present
609 w1.ol4ptr = l2_len + m->l3_len;
610 /* Increment it by 1 if it is IPV4 as 3 is with csum */
611 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
612 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
613 !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
616 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
619 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
620 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
621 /* HW will update ptr after vlan0 update */
622 send_hdr_ext->w1.vlan1_ins_ptr = 12;
623 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
625 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
626 /* 2B before end of l2 header */
627 send_hdr_ext->w1.vlan0_ins_ptr = 12;
628 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
631 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
635 mask = -(!w1.il3type);
636 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
638 send_hdr_ext->w0.lso_sb = lso_sb;
639 send_hdr_ext->w0.lso = 1;
640 send_hdr_ext->w0.lso_mps = m->tso_segsz;
641 send_hdr_ext->w0.lso_format =
642 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
643 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
645 /* Handle tunnel tso */
646 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
647 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
648 const uint8_t is_udp_tun =
649 (CNXK_NIX_UDP_TUN_BITMASK >>
650 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
652 uint8_t shift = is_udp_tun ? 32 : 0;
654 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
655 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
657 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
658 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
659 /* Update format for UDP tunneled packet */
660 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
664 if (flags & NIX_TX_NEED_SEND_HDR_W1)
665 send_hdr->w1.u = w1.u;
667 if (!(flags & NIX_TX_MULTI_SEG_F)) {
668 sg->seg1_size = send_hdr->w0.total;
669 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
671 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
672 /* DF bit = 1 if refcount of current mbuf or parent mbuf
674 * DF bit = 0 otherwise
676 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
678 /* Mark mempool object as "put" since it is freed by NIX */
679 if (!send_hdr->w0.df)
680 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
682 sg->seg1_size = m->data_len;
683 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
685 /* NOFF is handled later for multi-seg */
688 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
689 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
692 static __rte_always_inline void
693 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
694 const uint16_t flags)
696 struct nix_send_ext_s *send_hdr_ext;
697 union nix_send_sg_s *sg;
699 /* With minimal offloads, 'cmd' being local could be optimized out to
700 * registers. In other cases, 'cmd' will be in stack. Intent is
701 * 'cmd' stores content from txq->cmd which is copied only once.
703 *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
705 if (flags & NIX_TX_NEED_EXT_HDR) {
706 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
707 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
710 sg = (union nix_send_sg_s *)(cmd + 4);
712 sg = (union nix_send_sg_s *)(cmd + 2);
714 /* In case of multi-seg, sg template is stored here */
715 *((union nix_send_sg_s *)lmt_addr) = *sg;
716 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
719 static __rte_always_inline void
720 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
721 const uint64_t ol_flags, const uint16_t no_segdw,
722 const uint16_t flags)
724 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
725 const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
726 struct nix_send_ext_s *send_hdr_ext =
727 (struct nix_send_ext_s *)lmt_addr + 16;
728 uint64_t *lmt = (uint64_t *)lmt_addr;
729 uint16_t off = (no_segdw - 1) << 1;
730 struct nix_send_mem_s *send_mem;
732 send_mem = (struct nix_send_mem_s *)(lmt + off);
733 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
734 send_hdr_ext->w0.tstmp = 1;
735 if (flags & NIX_TX_MULTI_SEG_F) {
736 /* Retrieving the default desc values */
739 /* Using compiler barier to avoid voilation of C
742 rte_compiler_barrier();
745 /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
746 * should not be recorded, hence changing the alg type to
747 * NIX_SENDMEMALG_SET and also changing send mem addr field to
748 * next 8 bytes as it corrpt the actual tx tstamp registered
751 send_mem->w0.subdc = NIX_SUBDC_MEM;
752 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
754 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
758 static __rte_always_inline uint16_t
759 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
761 struct nix_send_hdr_s *send_hdr;
762 union nix_send_sg_s *sg;
763 struct rte_mbuf *m_next;
764 uint64_t *slist, sg_u;
769 send_hdr = (struct nix_send_hdr_s *)cmd;
771 if (flags & NIX_TX_NEED_EXT_HDR)
776 sg = (union nix_send_sg_s *)&cmd[2 + off];
778 /* Start from second segment, first segment is already there */
781 nb_segs = m->nb_segs - 1;
783 slist = &cmd[3 + off + 1];
785 /* Set invert df if buffer is not to be freed by H/W */
786 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
787 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
789 /* Mark mempool object as "put" since it is freed by NIX */
790 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
791 if (!(sg_u & (1ULL << 55)))
792 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
799 /* Fill mbuf segments */
802 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
803 *slist = rte_mbuf_data_iova(m);
804 /* Set invert df if buffer is not to be freed by H/W */
805 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
806 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
807 /* Mark mempool object as "put" since it is freed by NIX
809 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
810 if (!(sg_u & (1ULL << (i + 55))))
811 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
816 if (i > 2 && nb_segs) {
818 /* Next SG subdesc */
819 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
822 sg = (union nix_send_sg_s *)slist;
832 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
833 /* Roundup extra dwords to multiple of 2 */
834 segdw = (segdw >> 1) + (segdw & 0x1);
836 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
837 send_hdr->w0.sizem1 = segdw - 1;
842 static __rte_always_inline uint16_t
843 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
844 uint64_t *cmd, uintptr_t base, const uint16_t flags)
846 struct cn10k_eth_txq *txq = tx_queue;
847 const rte_iova_t io_addr = txq->io_addr;
848 uint8_t lnum, c_lnum, c_shft, c_loff;
849 uintptr_t pa, lbase = txq->lmt_base;
850 uint16_t lmt_id, burst, left, i;
851 uintptr_t c_lbase = lbase;
852 rte_iova_t c_io_addr;
853 uint64_t lso_tun_fmt;
860 if (!(flags & NIX_TX_VWQE_F)) {
861 NIX_XMIT_FC_OR_RETURN(txq, pkts);
862 /* Reduce the cached count */
863 txq->fc_cache_pkts -= pkts;
866 /* Get cmd skeleton */
867 cn10k_nix_tx_skeleton(txq, cmd, flags);
869 if (flags & NIX_TX_OFFLOAD_TSO_F)
870 lso_tun_fmt = txq->lso_tun_fmt;
872 /* Get LMT base address and LMT ID as lcore id */
873 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
874 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
875 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
876 c_io_addr = txq->cpt_io_addr;
877 sa_base = txq->sa_base;
882 burst = left > 32 ? 32 : left;
885 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
891 for (i = 0; i < burst; i++) {
892 /* Perform header writes for TSO, barrier at
893 * lmt steorl will suffice.
895 if (flags & NIX_TX_OFFLOAD_TSO_F)
896 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
898 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
901 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
903 /* Prepare CPT instruction and get nixtx addr */
904 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
905 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
906 &c_lnum, &c_loff, &c_shft, sa_base,
909 /* Move NIX desc to LMT/NIXTX area */
910 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
911 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
912 tx_pkts[i]->ol_flags, 4, flags);
913 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
917 if (flags & NIX_TX_VWQE_F)
918 roc_sso_hws_head_wait(base);
923 /* Submit CPT instructions if any */
924 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
925 /* Reduce pkts to be sent to CPT */
926 burst -= ((c_lnum << 1) + c_loff);
927 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
933 data = cn10k_nix_tx_steor_data(flags);
934 pa = io_addr | (data & 0x7) << 4;
936 data |= (15ULL << 12);
937 data |= (uint64_t)lmt_id;
940 roc_lmt_submit_steorl(data, pa);
942 data = cn10k_nix_tx_steor_data(flags);
943 pa = io_addr | (data & 0x7) << 4;
945 data |= ((uint64_t)(burst - 17)) << 12;
946 data |= (uint64_t)(lmt_id + 16);
949 roc_lmt_submit_steorl(data, pa);
951 data = cn10k_nix_tx_steor_data(flags);
952 pa = io_addr | (data & 0x7) << 4;
954 data |= ((uint64_t)(burst - 1)) << 12;
958 roc_lmt_submit_steorl(data, pa);
968 static __rte_always_inline uint16_t
969 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
970 uint16_t pkts, uint64_t *cmd, uintptr_t base,
971 const uint16_t flags)
973 struct cn10k_eth_txq *txq = tx_queue;
974 uintptr_t pa0, pa1, lbase = txq->lmt_base;
975 const rte_iova_t io_addr = txq->io_addr;
976 uint16_t segdw, lmt_id, burst, left, i;
977 uint8_t lnum, c_lnum, c_loff;
978 uintptr_t c_lbase = lbase;
979 uint64_t data0, data1;
980 rte_iova_t c_io_addr;
981 uint64_t lso_tun_fmt;
982 uint8_t shft, c_shft;
989 NIX_XMIT_FC_OR_RETURN(txq, pkts);
991 cn10k_nix_tx_skeleton(txq, cmd, flags);
993 /* Reduce the cached count */
994 txq->fc_cache_pkts -= pkts;
996 if (flags & NIX_TX_OFFLOAD_TSO_F)
997 lso_tun_fmt = txq->lso_tun_fmt;
999 /* Get LMT base address and LMT ID as lcore id */
1000 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1001 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1002 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1003 c_io_addr = txq->cpt_io_addr;
1004 sa_base = txq->sa_base;
1009 burst = left > 32 ? 32 : left;
1014 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1020 for (i = 0; i < burst; i++) {
1021 /* Perform header writes for TSO, barrier at
1022 * lmt steorl will suffice.
1024 if (flags & NIX_TX_OFFLOAD_TSO_F)
1025 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1027 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1030 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1032 /* Prepare CPT instruction and get nixtx addr */
1033 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1034 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1035 &c_lnum, &c_loff, &c_shft, sa_base,
1038 /* Move NIX desc to LMT/NIXTX area */
1039 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1041 /* Store sg list directly on lmt line */
1042 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1044 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
1045 tx_pkts[i]->ol_flags, segdw,
1047 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1049 data128 |= (((__uint128_t)(segdw - 1)) << shft);
1054 if (flags & NIX_TX_VWQE_F)
1055 roc_sso_hws_head_wait(base);
1060 /* Submit CPT instructions if any */
1061 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1062 /* Reduce pkts to be sent to CPT */
1063 burst -= ((c_lnum << 1) + c_loff);
1064 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1068 data0 = (uint64_t)data128;
1069 data1 = (uint64_t)(data128 >> 64);
1070 /* Make data0 similar to data1 */
1074 pa0 = io_addr | (data0 & 0x7) << 4;
1076 /* Move lmtst1..15 sz to bits 63:19 */
1078 data0 |= (15ULL << 12);
1079 data0 |= (uint64_t)lmt_id;
1082 roc_lmt_submit_steorl(data0, pa0);
1084 pa1 = io_addr | (data1 & 0x7) << 4;
1087 data1 |= ((uint64_t)(burst - 17)) << 12;
1088 data1 |= (uint64_t)(lmt_id + 16);
1091 roc_lmt_submit_steorl(data1, pa1);
1093 pa0 = io_addr | (data0 & 0x7) << 4;
1095 /* Move lmtst1..15 sz to bits 63:19 */
1097 data0 |= ((burst - 1) << 12);
1098 data0 |= (uint64_t)lmt_id;
1101 roc_lmt_submit_steorl(data0, pa0);
1111 #if defined(RTE_ARCH_ARM64)
1113 static __rte_always_inline void
1114 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1115 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1116 const uint64_t flags, const uint64_t lso_tun_fmt)
1121 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1124 mask = -(!w1->il3type);
1125 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1128 w0->lso_sb = lso_sb;
1129 w0->lso_mps = m->tso_segsz;
1130 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1131 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1133 /* Handle tunnel tso */
1134 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1135 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1136 const uint8_t is_udp_tun =
1137 (CNXK_NIX_UDP_TUN_BITMASK >>
1138 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1140 uint8_t shift = is_udp_tun ? 32 : 0;
1142 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1143 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1145 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1146 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1147 /* Update format for UDP tunneled packet */
1149 w0->lso_format = (lso_tun_fmt >> shift);
1153 static __rte_always_inline void
1154 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1155 union nix_send_hdr_w0_u *sh,
1156 union nix_send_sg_s *sg, const uint32_t flags)
1158 struct rte_mbuf *m_next;
1159 uint64_t *slist, sg_u;
1163 sh->total = m->pkt_len;
1164 /* Clear sg->u header before use */
1165 sg->u &= 0xFC00000000000000;
1169 sg_u = sg_u | ((uint64_t)m->data_len);
1171 nb_segs = m->nb_segs - 1;
1174 /* Set invert df if buffer is not to be freed by H/W */
1175 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1176 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1177 /* Mark mempool object as "put" since it is freed by NIX */
1178 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1179 if (!(sg_u & (1ULL << 55)))
1180 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1185 /* Fill mbuf segments */
1188 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1189 *slist = rte_mbuf_data_iova(m);
1190 /* Set invert df if buffer is not to be freed by H/W */
1191 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1192 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1193 /* Mark mempool object as "put" since it is freed by NIX
1195 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1196 if (!(sg_u & (1ULL << (i + 55))))
1197 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1203 if (i > 2 && nb_segs) {
1205 /* Next SG subdesc */
1206 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1209 sg = (union nix_send_sg_s *)slist;
1220 static __rte_always_inline void
1221 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1222 uint64x2_t *cmd1, const uint8_t segdw,
1223 const uint32_t flags)
1225 union nix_send_hdr_w0_u sh;
1226 union nix_send_sg_s sg;
1228 if (m->nb_segs == 1) {
1229 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1230 sg.u = vgetq_lane_u64(cmd1[0], 0);
1231 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1232 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1235 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1236 sg.u = vgetq_lane_u64(cmd1[0], 0);
1237 if (!(sg.u & (1ULL << 55)))
1238 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1244 sh.u = vgetq_lane_u64(cmd0[0], 0);
1245 sg.u = vgetq_lane_u64(cmd1[0], 0);
1247 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1249 sh.sizem1 = segdw - 1;
1250 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1251 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1254 #define NIX_DESCS_PER_LOOP 4
1256 static __rte_always_inline uint8_t
1257 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1258 uint64x2_t *cmd1, uint64x2_t *cmd2,
1259 uint64x2_t *cmd3, uint8_t *segdw,
1260 uint64_t *lmt_addr, __uint128_t *data128,
1261 uint8_t *shift, const uint16_t flags)
1263 uint8_t j, off, lmt_used;
1265 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1266 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1267 /* No segments in 4 consecutive packets. */
1268 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1269 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1270 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1273 vst1q_u64(lmt_addr, cmd0[0]);
1274 vst1q_u64(lmt_addr + 2, cmd1[0]);
1275 vst1q_u64(lmt_addr + 4, cmd0[1]);
1276 vst1q_u64(lmt_addr + 6, cmd1[1]);
1277 vst1q_u64(lmt_addr + 8, cmd0[2]);
1278 vst1q_u64(lmt_addr + 10, cmd1[2]);
1279 vst1q_u64(lmt_addr + 12, cmd0[3]);
1280 vst1q_u64(lmt_addr + 14, cmd1[3]);
1282 *data128 |= ((__uint128_t)7) << *shift;
1290 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1291 /* Fit consecutive packets in same LMTLINE. */
1292 if ((segdw[j] + segdw[j + 1]) <= 8) {
1293 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1294 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1297 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1300 segdw[j + 1], flags);
1301 /* TSTAMP takes 4 each, no segs. */
1302 vst1q_u64(lmt_addr, cmd0[j]);
1303 vst1q_u64(lmt_addr + 2, cmd2[j]);
1304 vst1q_u64(lmt_addr + 4, cmd1[j]);
1305 vst1q_u64(lmt_addr + 6, cmd3[j]);
1307 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1308 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1309 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1310 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1311 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1312 /* EXT header take 3 each, space for 2 segs.*/
1313 cn10k_nix_prepare_mseg_vec(mbufs[j],
1317 vst1q_u64(lmt_addr, cmd0[j]);
1318 vst1q_u64(lmt_addr + 2, cmd2[j]);
1319 vst1q_u64(lmt_addr + 4, cmd1[j]);
1322 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1323 lmt_addr + 12 + off,
1326 segdw[j + 1], flags);
1327 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1328 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1329 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1331 cn10k_nix_prepare_mseg_vec(mbufs[j],
1335 vst1q_u64(lmt_addr, cmd0[j]);
1336 vst1q_u64(lmt_addr + 2, cmd1[j]);
1339 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1343 segdw[j + 1], flags);
1344 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1345 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1347 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1352 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1353 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1354 cn10k_nix_prepare_mseg_vec(mbufs[j],
1358 vst1q_u64(lmt_addr, cmd0[j]);
1359 vst1q_u64(lmt_addr + 2, cmd2[j]);
1360 vst1q_u64(lmt_addr + 4, cmd1[j]);
1363 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1364 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1365 cn10k_nix_prepare_mseg_vec(mbufs[j],
1369 vst1q_u64(lmt_addr, cmd0[j]);
1370 vst1q_u64(lmt_addr + 2, cmd2[j]);
1371 vst1q_u64(lmt_addr + 4, cmd1[j]);
1373 cn10k_nix_prepare_mseg_vec(mbufs[j],
1377 vst1q_u64(lmt_addr, cmd0[j]);
1378 vst1q_u64(lmt_addr + 2, cmd1[j]);
1380 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1391 static __rte_always_inline void
1392 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1393 uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1395 /* Go to next line if we are out of space */
1396 if ((*loff + (dw << 4)) > 128) {
1397 *data128 = *data128 |
1398 (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1399 *shift = *shift + 3;
1404 *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1405 *loff = *loff + (dw << 4);
1408 static __rte_always_inline void
1409 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1410 uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1411 uint64x2_t cmd3, const uint16_t flags)
1415 /* Handle no fast free when security is enabled without mseg */
1416 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1417 (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1418 !(flags & NIX_TX_MULTI_SEG_F)) {
1419 union nix_send_sg_s sg;
1421 sg.u = vgetq_lane_u64(cmd1, 0);
1422 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1423 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1425 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1426 sg.u = vgetq_lane_u64(cmd1, 0);
1427 if (!(sg.u & (1ULL << 55)))
1428 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1433 if (flags & NIX_TX_MULTI_SEG_F) {
1434 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1435 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1436 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1437 &cmd0, &cmd1, segdw, flags);
1438 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1439 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1440 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1443 vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1444 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1445 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1446 &cmd0, &cmd1, segdw, flags);
1447 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1448 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1449 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1451 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1452 &cmd0, &cmd1, segdw, flags);
1453 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1454 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1456 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1457 /* Store the prepared send desc to LMT lines */
1458 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1459 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1460 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1461 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1462 vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1464 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1465 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1466 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1469 /* Store the prepared send desc to LMT lines */
1470 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1471 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1475 static __rte_always_inline uint16_t
1476 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1477 uint16_t pkts, uint64_t *cmd, uintptr_t base,
1478 const uint16_t flags)
1480 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1481 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1482 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1483 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1484 uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1485 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1486 uint64x2_t senddesc01_w0, senddesc23_w0;
1487 uint64x2_t senddesc01_w1, senddesc23_w1;
1488 uint64x2_t sendext01_w0, sendext23_w0;
1489 uint64x2_t sendext01_w1, sendext23_w1;
1490 uint64x2_t sendmem01_w0, sendmem23_w0;
1491 uint64x2_t sendmem01_w1, sendmem23_w1;
1492 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1493 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1494 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1495 struct cn10k_eth_txq *txq = tx_queue;
1496 rte_iova_t io_addr = txq->io_addr;
1497 uintptr_t laddr = txq->lmt_base;
1498 uint8_t c_lnum, c_shft, c_loff;
1499 uint64x2_t ltypes01, ltypes23;
1500 uint64x2_t xtmp128, ytmp128;
1501 uint64x2_t xmask01, xmask23;
1502 uintptr_t c_laddr = laddr;
1503 uint8_t lnum, shift, loff;
1504 rte_iova_t c_io_addr;
1507 __uint128_t data128;
1511 if (!(flags & NIX_TX_VWQE_F)) {
1512 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1513 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1514 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1515 /* Reduce the cached count */
1516 txq->fc_cache_pkts -= pkts;
1518 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1519 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1522 /* Perform header writes before barrier for TSO */
1523 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1524 for (i = 0; i < pkts; i++)
1525 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1528 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1529 senddesc23_w0 = senddesc01_w0;
1530 senddesc01_w1 = vdupq_n_u64(0);
1531 senddesc23_w1 = senddesc01_w1;
1532 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1533 sgdesc23_w0 = sgdesc01_w0;
1535 /* Load command defaults into vector variables. */
1536 if (flags & NIX_TX_NEED_EXT_HDR) {
1537 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1538 sendext23_w0 = sendext01_w0;
1539 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1540 sendext23_w1 = sendext01_w1;
1541 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1542 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1543 sendmem23_w0 = sendmem01_w0;
1544 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1545 sendmem23_w1 = sendmem01_w1;
1549 /* Get LMT base address and LMT ID as lcore id */
1550 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1551 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1552 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1553 c_io_addr = txq->cpt_io_addr;
1554 sa_base = txq->sa_base;
1559 /* Number of packets to prepare depends on offloads enabled. */
1560 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1561 cn10k_nix_pkts_per_vec_brst(flags) :
1563 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1568 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1575 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1576 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1581 if (flags & NIX_TX_MULTI_SEG_F) {
1584 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1585 struct rte_mbuf *m = tx_pkts[j];
1587 /* Get dwords based on nb_segs. */
1588 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1589 /* Add dwords based on offloads. */
1590 segdw[j] += 1 + /* SEND HDR */
1591 !!(flags & NIX_TX_NEED_EXT_HDR) +
1592 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1595 /* Check if there are enough LMTLINES for this loop */
1596 if (lnum + 4 > 32) {
1597 uint8_t ldwords_con = 0, lneeded = 0;
1598 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1599 ldwords_con += segdw[j];
1600 if (ldwords_con > 8) {
1602 ldwords_con = segdw[j];
1606 if (lnum + lneeded > 32) {
1612 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1614 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1615 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1617 senddesc23_w0 = senddesc01_w0;
1618 sgdesc23_w0 = sgdesc01_w0;
1620 /* Clear vlan enables. */
1621 if (flags & NIX_TX_NEED_EXT_HDR) {
1622 sendext01_w1 = vbicq_u64(sendext01_w1,
1623 vdupq_n_u64(0x3FFFF00FFFF00));
1624 sendext23_w1 = sendext01_w1;
1627 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1628 /* Reset send mem alg to SETTSTMP from SUB*/
1629 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1630 vdupq_n_u64(BIT_ULL(59)));
1631 /* Reset send mem address to default. */
1633 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1634 sendmem23_w0 = sendmem01_w0;
1635 sendmem23_w1 = sendmem01_w1;
1638 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1639 /* Clear the LSO enable bit. */
1640 sendext01_w0 = vbicq_u64(sendext01_w0,
1641 vdupq_n_u64(BIT_ULL(14)));
1642 sendext23_w0 = sendext01_w0;
1645 /* Move mbufs to iova */
1646 mbuf0 = (uint64_t *)tx_pkts[0];
1647 mbuf1 = (uint64_t *)tx_pkts[1];
1648 mbuf2 = (uint64_t *)tx_pkts[2];
1649 mbuf3 = (uint64_t *)tx_pkts[3];
1651 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1652 offsetof(struct rte_mbuf, buf_iova));
1653 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1654 offsetof(struct rte_mbuf, buf_iova));
1655 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1656 offsetof(struct rte_mbuf, buf_iova));
1657 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1658 offsetof(struct rte_mbuf, buf_iova));
1660 * Get mbuf's, olflags, iova, pktlen, dataoff
1661 * dataoff_iovaX.D[0] = iova,
1662 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1663 * len_olflagsX.D[0] = ol_flags,
1664 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1666 dataoff_iova0 = vld1q_u64(mbuf0);
1667 len_olflags0 = vld1q_u64(mbuf0 + 2);
1668 dataoff_iova1 = vld1q_u64(mbuf1);
1669 len_olflags1 = vld1q_u64(mbuf1 + 2);
1670 dataoff_iova2 = vld1q_u64(mbuf2);
1671 len_olflags2 = vld1q_u64(mbuf2 + 2);
1672 dataoff_iova3 = vld1q_u64(mbuf3);
1673 len_olflags3 = vld1q_u64(mbuf3 + 2);
1675 /* Move mbufs to point pool */
1676 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1677 offsetof(struct rte_mbuf, pool) -
1678 offsetof(struct rte_mbuf, buf_iova));
1679 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1680 offsetof(struct rte_mbuf, pool) -
1681 offsetof(struct rte_mbuf, buf_iova));
1682 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1683 offsetof(struct rte_mbuf, pool) -
1684 offsetof(struct rte_mbuf, buf_iova));
1685 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1686 offsetof(struct rte_mbuf, pool) -
1687 offsetof(struct rte_mbuf, buf_iova));
1689 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1690 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1691 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1693 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1694 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1697 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1698 : [a] "+w"(senddesc01_w1)
1699 : [in] "r"(mbuf0 + 2)
1702 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1703 : [a] "+w"(senddesc01_w1)
1704 : [in] "r"(mbuf1 + 2)
1707 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1708 : [b] "+w"(senddesc23_w1)
1709 : [in] "r"(mbuf2 + 2)
1712 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1713 : [b] "+w"(senddesc23_w1)
1714 : [in] "r"(mbuf3 + 2)
1717 /* Get pool pointer alone */
1718 mbuf0 = (uint64_t *)*mbuf0;
1719 mbuf1 = (uint64_t *)*mbuf1;
1720 mbuf2 = (uint64_t *)*mbuf2;
1721 mbuf3 = (uint64_t *)*mbuf3;
1723 /* Get pool pointer alone */
1724 mbuf0 = (uint64_t *)*mbuf0;
1725 mbuf1 = (uint64_t *)*mbuf1;
1726 mbuf2 = (uint64_t *)*mbuf2;
1727 mbuf3 = (uint64_t *)*mbuf3;
1730 const uint8x16_t shuf_mask2 = {
1731 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1732 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1734 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1735 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1737 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1738 const uint64x2_t and_mask0 = {
1743 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1744 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1745 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1746 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1749 * Pick only 16 bits of pktlen preset at bits 63:32
1750 * and place them at bits 15:0.
1752 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1753 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1755 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1756 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1757 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1759 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1760 * pktlen at 15:0 position.
1762 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1763 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1764 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1765 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1767 /* Move mbuf to point to pool_id. */
1768 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1769 offsetof(struct rte_mempool, pool_id));
1770 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1771 offsetof(struct rte_mempool, pool_id));
1772 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1773 offsetof(struct rte_mempool, pool_id));
1774 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1775 offsetof(struct rte_mempool, pool_id));
1777 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1778 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1780 * Lookup table to translate ol_flags to
1781 * il3/il4 types. But we still use ol3/ol4 types in
1782 * senddesc_w1 as only one header processing is enabled.
1784 const uint8x16_t tbl = {
1785 /* [0-15] = il4type:il3type */
1786 0x04, /* none (IPv6 assumed) */
1787 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1788 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1789 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1790 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1791 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1792 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1793 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1794 0x02, /* RTE_MBUF_F_TX_IPV4 */
1795 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1796 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1797 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1798 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1799 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1800 * RTE_MBUF_F_TX_TCP_CKSUM
1802 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1803 * RTE_MBUF_F_TX_SCTP_CKSUM
1805 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1806 * RTE_MBUF_F_TX_UDP_CKSUM
1810 /* Extract olflags to translate to iltypes */
1811 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1812 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1815 * E(47):L3_LEN(9):L2_LEN(7+z)
1816 * E(47):L3_LEN(9):L2_LEN(7+z)
1818 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1819 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1821 /* Move OLFLAGS bits 55:52 to 51:48
1822 * with zeros preprended on the byte and rest
1825 xtmp128 = vshrq_n_u8(xtmp128, 4);
1826 ytmp128 = vshrq_n_u8(ytmp128, 4);
1828 * E(48):L3_LEN(8):L2_LEN(z+7)
1829 * E(48):L3_LEN(8):L2_LEN(z+7)
1831 const int8x16_t tshft3 = {
1832 -1, 0, 8, 8, 8, 8, 8, 8,
1833 -1, 0, 8, 8, 8, 8, 8, 8,
1836 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1837 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1840 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1841 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1843 /* Pick only relevant fields i.e Bit 48:55 of iltype
1844 * and place it in ol3/ol4type of senddesc_w1
1846 const uint8x16_t shuf_mask0 = {
1847 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1848 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1851 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1852 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1854 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1855 * a [E(32):E(16):OL3(8):OL2(8)]
1857 * a [E(32):E(16):(OL3+OL2):OL2]
1858 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1860 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1861 vshlq_n_u16(senddesc01_w1, 8));
1862 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1863 vshlq_n_u16(senddesc23_w1, 8));
1865 /* Move ltypes to senddesc*_w1 */
1866 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1867 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1868 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1869 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1871 * Lookup table to translate ol_flags to
1875 const uint8x16_t tbl = {
1876 /* [0-15] = ol4type:ol3type */
1878 0x03, /* OUTER_IP_CKSUM */
1879 0x02, /* OUTER_IPV4 */
1880 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1881 0x04, /* OUTER_IPV6 */
1882 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1883 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1884 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1887 0x00, /* OUTER_UDP_CKSUM */
1888 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1889 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1890 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1893 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1894 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1897 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1900 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1901 * OUTER_IPV4 | OUTER_IP_CKSUM
1905 /* Extract olflags to translate to iltypes */
1906 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1907 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1910 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1911 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1913 const uint8x16_t shuf_mask5 = {
1914 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1915 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1917 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1918 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1920 /* Extract outer ol flags only */
1921 const uint64x2_t o_cksum_mask = {
1926 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1927 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1929 /* Extract OUTER_UDP_CKSUM bit 41 and
1933 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1934 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1936 /* Shift oltype by 2 to start nibble from BIT(56)
1937 * instead of BIT(58)
1939 xtmp128 = vshrq_n_u8(xtmp128, 2);
1940 ytmp128 = vshrq_n_u8(ytmp128, 2);
1942 * E(48):L3_LEN(8):L2_LEN(z+7)
1943 * E(48):L3_LEN(8):L2_LEN(z+7)
1945 const int8x16_t tshft3 = {
1946 -1, 0, 8, 8, 8, 8, 8, 8,
1947 -1, 0, 8, 8, 8, 8, 8, 8,
1950 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1951 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1954 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1955 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1957 /* Pick only relevant fields i.e Bit 56:63 of oltype
1958 * and place it in ol3/ol4type of senddesc_w1
1960 const uint8x16_t shuf_mask0 = {
1961 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1962 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1965 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1966 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1968 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1969 * a [E(32):E(16):OL3(8):OL2(8)]
1971 * a [E(32):E(16):(OL3+OL2):OL2]
1972 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1974 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1975 vshlq_n_u16(senddesc01_w1, 8));
1976 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1977 vshlq_n_u16(senddesc23_w1, 8));
1979 /* Move ltypes to senddesc*_w1 */
1980 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1981 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1982 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1983 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1984 /* Lookup table to translate ol_flags to
1985 * ol4type, ol3type, il4type, il3type of senddesc_w1
1987 const uint8x16x2_t tbl = {{
1989 /* [0-15] = il4type:il3type */
1990 0x04, /* none (IPv6) */
1991 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1992 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1993 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1994 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1995 0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1996 * RTE_MBUF_F_TX_TCP_CKSUM
1998 0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
1999 * RTE_MBUF_F_TX_SCTP_CKSUM
2001 0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2002 * RTE_MBUF_F_TX_UDP_CKSUM
2004 0x02, /* RTE_MBUF_F_TX_IPV4 */
2005 0x12, /* RTE_MBUF_F_TX_IPV4 |
2006 * RTE_MBUF_F_TX_TCP_CKSUM
2008 0x22, /* RTE_MBUF_F_TX_IPV4 |
2009 * RTE_MBUF_F_TX_SCTP_CKSUM
2011 0x32, /* RTE_MBUF_F_TX_IPV4 |
2012 * RTE_MBUF_F_TX_UDP_CKSUM
2014 0x03, /* RTE_MBUF_F_TX_IPV4 |
2015 * RTE_MBUF_F_TX_IP_CKSUM
2017 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2018 * RTE_MBUF_F_TX_TCP_CKSUM
2020 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2021 * RTE_MBUF_F_TX_SCTP_CKSUM
2023 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2024 * RTE_MBUF_F_TX_UDP_CKSUM
2029 /* [16-31] = ol4type:ol3type */
2031 0x03, /* OUTER_IP_CKSUM */
2032 0x02, /* OUTER_IPV4 */
2033 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2034 0x04, /* OUTER_IPV6 */
2035 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2036 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2037 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2040 0x00, /* OUTER_UDP_CKSUM */
2041 0x33, /* OUTER_UDP_CKSUM |
2044 0x32, /* OUTER_UDP_CKSUM |
2047 0x33, /* OUTER_UDP_CKSUM |
2048 * OUTER_IPV4 | OUTER_IP_CKSUM
2050 0x34, /* OUTER_UDP_CKSUM |
2053 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2056 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2059 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2060 * OUTER_IPV4 | OUTER_IP_CKSUM
2065 /* Extract olflags to translate to oltype & iltype */
2066 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2067 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2070 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2071 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2073 const uint32x4_t tshft_4 = {
2079 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2080 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2083 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2084 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2086 const uint8x16_t shuf_mask5 = {
2087 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2088 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2090 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2091 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2093 /* Extract outer and inner header ol_flags */
2094 const uint64x2_t oi_cksum_mask = {
2099 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2100 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2102 /* Extract OUTER_UDP_CKSUM bit 41 and
2106 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2107 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2109 /* Shift right oltype by 2 and iltype by 4
2110 * to start oltype nibble from BIT(58)
2111 * instead of BIT(56) and iltype nibble from BIT(48)
2112 * instead of BIT(52).
2114 const int8x16_t tshft5 = {
2115 8, 8, 8, 8, 8, 8, -4, -2,
2116 8, 8, 8, 8, 8, 8, -4, -2,
2119 xtmp128 = vshlq_u8(xtmp128, tshft5);
2120 ytmp128 = vshlq_u8(ytmp128, tshft5);
2122 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2123 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2125 const int8x16_t tshft3 = {
2126 -1, 0, -1, 0, 0, 0, 0, 0,
2127 -1, 0, -1, 0, 0, 0, 0, 0,
2130 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2131 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2133 /* Mark Bit(4) of oltype */
2134 const uint64x2_t oi_cksum_mask2 = {
2139 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2140 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2143 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2144 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2146 /* Pick only relevant fields i.e Bit 48:55 of iltype and
2147 * Bit 56:63 of oltype and place it in corresponding
2148 * place in senddesc_w1.
2150 const uint8x16_t shuf_mask0 = {
2151 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2152 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2155 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2156 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2158 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2159 * l3len, l2len, ol3len, ol2len.
2160 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2162 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2164 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2165 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2167 senddesc01_w1 = vaddq_u8(senddesc01_w1,
2168 vshlq_n_u32(senddesc01_w1, 8));
2169 senddesc23_w1 = vaddq_u8(senddesc23_w1,
2170 vshlq_n_u32(senddesc23_w1, 8));
2172 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2173 senddesc01_w1 = vaddq_u8(
2174 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2175 senddesc23_w1 = vaddq_u8(
2176 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2178 /* Move ltypes to senddesc*_w1 */
2179 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2180 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2183 xmask01 = vdupq_n_u64(0);
2185 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2190 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2195 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2200 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2204 xmask01 = vshlq_n_u64(xmask01, 20);
2205 xmask23 = vshlq_n_u64(xmask23, 20);
2207 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2208 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2210 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2211 /* Tx ol_flag for vlan. */
2212 const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2213 /* Bit enable for VLAN1 */
2214 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2215 /* Tx ol_flag for QnQ. */
2216 const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2217 /* Bit enable for VLAN0 */
2218 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2219 /* Load vlan values from packet. outer is VLAN 0 */
2220 uint64x2_t ext01 = {
2221 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2222 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2223 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2224 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2226 uint64x2_t ext23 = {
2227 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2228 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2229 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2230 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2233 /* Get ol_flags of the packets. */
2234 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2235 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2237 /* ORR vlan outer/inner values into cmd. */
2238 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2239 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2241 /* Test for offload enable bits and generate masks. */
2242 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2244 vandq_u64(vtstq_u64(xtmp128, olq),
2246 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2248 vandq_u64(vtstq_u64(ytmp128, olq),
2251 /* Set vlan enable bits into cmd based on mask. */
2252 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2253 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2256 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2257 /* Tx ol_flag for timestam. */
2258 const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2259 RTE_MBUF_F_TX_IEEE1588_TMST};
2260 /* Set send mem alg to SUB. */
2261 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2262 /* Increment send mem address by 8. */
2263 const uint64x2_t addr = {0x8, 0x8};
2265 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2266 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2268 /* Check if timestamp is requested and generate inverted
2269 * mask as we need not make any changes to default cmd
2272 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2273 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2275 /* Change send mem address to an 8 byte offset when
2276 * TSTMP is disabled.
2278 sendmem01_w1 = vaddq_u64(sendmem01_w1,
2279 vandq_u64(xtmp128, addr));
2280 sendmem23_w1 = vaddq_u64(sendmem23_w1,
2281 vandq_u64(ytmp128, addr));
2282 /* Change send mem alg to SUB when TSTMP is disabled. */
2283 sendmem01_w0 = vorrq_u64(sendmem01_w0,
2284 vandq_u64(xtmp128, alg));
2285 sendmem23_w0 = vorrq_u64(sendmem23_w0,
2286 vandq_u64(ytmp128, alg));
2288 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2289 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2290 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2291 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2294 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2295 const uint64_t lso_fmt = txq->lso_tun_fmt;
2296 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2297 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2299 /* Extract SD W1 as we need to set L4 types. */
2300 vst1q_u64(sd_w1, senddesc01_w1);
2301 vst1q_u64(sd_w1 + 2, senddesc23_w1);
2303 /* Extract SX W0 as we need to set LSO fields. */
2304 vst1q_u64(sx_w0, sendext01_w0);
2305 vst1q_u64(sx_w0 + 2, sendext23_w0);
2307 /* Extract ol_flags. */
2308 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2309 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2311 /* Prepare individual mbufs. */
2312 cn10k_nix_prepare_tso(tx_pkts[0],
2313 (union nix_send_hdr_w1_u *)&sd_w1[0],
2314 (union nix_send_ext_w0_u *)&sx_w0[0],
2315 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2317 cn10k_nix_prepare_tso(tx_pkts[1],
2318 (union nix_send_hdr_w1_u *)&sd_w1[1],
2319 (union nix_send_ext_w0_u *)&sx_w0[1],
2320 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2322 cn10k_nix_prepare_tso(tx_pkts[2],
2323 (union nix_send_hdr_w1_u *)&sd_w1[2],
2324 (union nix_send_ext_w0_u *)&sx_w0[2],
2325 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2327 cn10k_nix_prepare_tso(tx_pkts[3],
2328 (union nix_send_hdr_w1_u *)&sd_w1[3],
2329 (union nix_send_ext_w0_u *)&sx_w0[3],
2330 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2332 senddesc01_w1 = vld1q_u64(sd_w1);
2333 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2335 sendext01_w0 = vld1q_u64(sx_w0);
2336 sendext23_w0 = vld1q_u64(sx_w0 + 2);
2339 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2340 !(flags & NIX_TX_MULTI_SEG_F) &&
2341 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2342 /* Set don't free bit if reference count > 1 */
2343 xmask01 = vdupq_n_u64(0);
2346 /* Move mbufs to iova */
2347 mbuf0 = (uint64_t *)tx_pkts[0];
2348 mbuf1 = (uint64_t *)tx_pkts[1];
2349 mbuf2 = (uint64_t *)tx_pkts[2];
2350 mbuf3 = (uint64_t *)tx_pkts[3];
2352 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2353 vsetq_lane_u64(0x80000, xmask01, 0);
2355 RTE_MEMPOOL_CHECK_COOKIES(
2356 ((struct rte_mbuf *)mbuf0)->pool,
2357 (void **)&mbuf0, 1, 0);
2359 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2360 vsetq_lane_u64(0x80000, xmask01, 1);
2362 RTE_MEMPOOL_CHECK_COOKIES(
2363 ((struct rte_mbuf *)mbuf1)->pool,
2364 (void **)&mbuf1, 1, 0);
2366 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2367 vsetq_lane_u64(0x80000, xmask23, 0);
2369 RTE_MEMPOOL_CHECK_COOKIES(
2370 ((struct rte_mbuf *)mbuf2)->pool,
2371 (void **)&mbuf2, 1, 0);
2373 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2374 vsetq_lane_u64(0x80000, xmask23, 1);
2376 RTE_MEMPOOL_CHECK_COOKIES(
2377 ((struct rte_mbuf *)mbuf3)->pool,
2378 (void **)&mbuf3, 1, 0);
2379 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2380 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2381 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2382 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2383 /* Move mbufs to iova */
2384 mbuf0 = (uint64_t *)tx_pkts[0];
2385 mbuf1 = (uint64_t *)tx_pkts[1];
2386 mbuf2 = (uint64_t *)tx_pkts[2];
2387 mbuf3 = (uint64_t *)tx_pkts[3];
2389 /* Mark mempool object as "put" since
2390 * it is freed by NIX
2392 RTE_MEMPOOL_CHECK_COOKIES(
2393 ((struct rte_mbuf *)mbuf0)->pool,
2394 (void **)&mbuf0, 1, 0);
2396 RTE_MEMPOOL_CHECK_COOKIES(
2397 ((struct rte_mbuf *)mbuf1)->pool,
2398 (void **)&mbuf1, 1, 0);
2400 RTE_MEMPOOL_CHECK_COOKIES(
2401 ((struct rte_mbuf *)mbuf2)->pool,
2402 (void **)&mbuf2, 1, 0);
2404 RTE_MEMPOOL_CHECK_COOKIES(
2405 ((struct rte_mbuf *)mbuf3)->pool,
2406 (void **)&mbuf3, 1, 0);
2409 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2410 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2411 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2412 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2413 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2415 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2416 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2417 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2418 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2420 if (flags & NIX_TX_NEED_EXT_HDR) {
2421 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2422 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2423 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2424 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2427 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2428 const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2429 RTE_MBUF_F_TX_SEC_OFFLOAD};
2433 /* Extract ol_flags. */
2434 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2435 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2437 xtmp128 = vtstq_u64(olf, xtmp128);
2438 ytmp128 = vtstq_u64(olf, ytmp128);
2441 dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2442 if (vgetq_lane_u64(xtmp128, 0))
2443 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2444 &cmd1[0], &next, c_laddr,
2446 &c_shft, sa_base, flags);
2448 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2449 &shift, &wd.data128, &next);
2451 /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2452 cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2453 cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2457 dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2458 if (vgetq_lane_u64(xtmp128, 1))
2459 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2460 &cmd1[1], &next, c_laddr,
2462 &c_shft, sa_base, flags);
2464 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2465 &shift, &wd.data128, &next);
2467 /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2468 cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2469 cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2473 dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2474 if (vgetq_lane_u64(ytmp128, 0))
2475 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2476 &cmd1[2], &next, c_laddr,
2478 &c_shft, sa_base, flags);
2480 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2481 &shift, &wd.data128, &next);
2483 /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2484 cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2485 cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2489 dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2490 if (vgetq_lane_u64(ytmp128, 1))
2491 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2492 &cmd1[3], &next, c_laddr,
2494 &c_shft, sa_base, flags);
2496 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2497 &shift, &wd.data128, &next);
2499 /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2500 cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2501 cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2504 } else if (flags & NIX_TX_MULTI_SEG_F) {
2508 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2511 LMT_OFF(laddr, lnum,
2513 &wd.data128, &shift,
2516 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2517 /* Store the prepared send desc to LMT lines */
2518 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2519 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2520 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2521 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2522 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2523 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2524 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2525 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2526 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2528 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2529 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2530 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2531 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2532 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2533 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2534 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2535 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2537 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2538 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2539 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2540 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2541 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2542 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2544 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2545 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2546 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2547 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2548 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2549 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2553 /* Store the prepared send desc to LMT lines */
2554 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2555 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2556 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2557 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2558 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2559 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2560 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2561 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2565 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2568 /* Roundup lnum to last line if it is partial */
2569 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2570 lnum = lnum + !!loff;
2571 wd.data128 = wd.data128 |
2572 (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2575 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2578 if (flags & NIX_TX_VWQE_F)
2579 roc_sso_hws_head_wait(base);
2583 /* Submit CPT instructions if any */
2584 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2585 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2590 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2591 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2593 pa = io_addr | (wd.data[0] & 0x7) << 4;
2594 wd.data[0] &= ~0x7ULL;
2596 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2599 wd.data[0] |= (15ULL << 12);
2600 wd.data[0] |= (uint64_t)lmt_id;
2603 roc_lmt_submit_steorl(wd.data[0], pa);
2605 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2606 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2608 pa = io_addr | (wd.data[1] & 0x7) << 4;
2609 wd.data[1] &= ~0x7ULL;
2611 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2614 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2615 wd.data[1] |= (uint64_t)(lmt_id + 16);
2618 roc_lmt_submit_steorl(wd.data[1], pa);
2620 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2621 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2623 pa = io_addr | (wd.data[0] & 0x7) << 4;
2624 wd.data[0] &= ~0x7ULL;
2626 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2629 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2630 wd.data[0] |= lmt_id;
2633 roc_lmt_submit_steorl(wd.data[0], pa);
2640 if (unlikely(scalar)) {
2641 if (flags & NIX_TX_MULTI_SEG_F)
2642 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2646 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2654 static __rte_always_inline uint16_t
2655 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2656 uint16_t pkts, uint64_t *cmd, uintptr_t base,
2657 const uint16_t flags)
2659 RTE_SET_USED(tx_queue);
2660 RTE_SET_USED(tx_pkts);
2663 RTE_SET_USED(flags);
2669 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2670 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2671 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2672 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2673 #define TSO_F NIX_TX_OFFLOAD_TSO_F
2674 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2675 #define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F
2677 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2678 #define NIX_TX_FASTPATH_MODES \
2679 T(no_offload, 0, 0, 0, 0, 0, 0, 0, 4, \
2680 NIX_TX_OFFLOAD_NONE) \
2681 T(l3l4csum, 0, 0, 0, 0, 0, 0, 1, 4, \
2683 T(ol3ol4csum, 0, 0, 0, 0, 0, 1, 0, 4, \
2685 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 0, 1, 1, 4, \
2686 OL3OL4CSUM_F | L3L4CSUM_F) \
2687 T(vlan, 0, 0, 0, 0, 1, 0, 0, 6, \
2689 T(vlan_l3l4csum, 0, 0, 0, 0, 1, 0, 1, 6, \
2690 VLAN_F | L3L4CSUM_F) \
2691 T(vlan_ol3ol4csum, 0, 0, 0, 0, 1, 1, 0, 6, \
2692 VLAN_F | OL3OL4CSUM_F) \
2693 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 1, 6, \
2694 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2695 T(noff, 0, 0, 0, 1, 0, 0, 0, 4, \
2697 T(noff_l3l4csum, 0, 0, 0, 1, 0, 0, 1, 4, \
2698 NOFF_F | L3L4CSUM_F) \
2699 T(noff_ol3ol4csum, 0, 0, 0, 1, 0, 1, 0, 4, \
2700 NOFF_F | OL3OL4CSUM_F) \
2701 T(noff_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 0, 1, 1, 4, \
2702 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2703 T(noff_vlan, 0, 0, 0, 1, 1, 0, 0, 6, \
2705 T(noff_vlan_l3l4csum, 0, 0, 0, 1, 1, 0, 1, 6, \
2706 NOFF_F | VLAN_F | L3L4CSUM_F) \
2707 T(noff_vlan_ol3ol4csum, 0, 0, 0, 1, 1, 1, 0, 6, \
2708 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2709 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 1, 6, \
2710 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2711 T(tso, 0, 0, 1, 0, 0, 0, 0, 6, \
2713 T(tso_l3l4csum, 0, 0, 1, 0, 0, 0, 1, 6, \
2714 TSO_F | L3L4CSUM_F) \
2715 T(tso_ol3ol4csum, 0, 0, 1, 0, 0, 1, 0, 6, \
2716 TSO_F | OL3OL4CSUM_F) \
2717 T(tso_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 0, 1, 1, 6, \
2718 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2719 T(tso_vlan, 0, 0, 1, 0, 1, 0, 0, 6, \
2721 T(tso_vlan_l3l4csum, 0, 0, 1, 0, 1, 0, 1, 6, \
2722 TSO_F | VLAN_F | L3L4CSUM_F) \
2723 T(tso_vlan_ol3ol4csum, 0, 0, 1, 0, 1, 1, 0, 6, \
2724 TSO_F | VLAN_F | OL3OL4CSUM_F) \
2725 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 1, 6, \
2726 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2727 T(tso_noff, 0, 0, 1, 1, 0, 0, 0, 6, \
2729 T(tso_noff_l3l4csum, 0, 0, 1, 1, 0, 0, 1, 6, \
2730 TSO_F | NOFF_F | L3L4CSUM_F) \
2731 T(tso_noff_ol3ol4csum, 0, 0, 1, 1, 0, 1, 0, 6, \
2732 TSO_F | NOFF_F | OL3OL4CSUM_F) \
2733 T(tso_noff_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 0, 1, 1, 6, \
2734 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2735 T(tso_noff_vlan, 0, 0, 1, 1, 1, 0, 0, 6, \
2736 TSO_F | NOFF_F | VLAN_F) \
2737 T(tso_noff_vlan_l3l4csum, 0, 0, 1, 1, 1, 0, 1, 6, \
2738 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2739 T(tso_noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 1, 0, 6, \
2740 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2741 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 1, 6, \
2742 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2743 T(ts, 0, 1, 0, 0, 0, 0, 0, 8, \
2745 T(ts_l3l4csum, 0, 1, 0, 0, 0, 0, 1, 8, \
2746 TSP_F | L3L4CSUM_F) \
2747 T(ts_ol3ol4csum, 0, 1, 0, 0, 0, 1, 0, 8, \
2748 TSP_F | OL3OL4CSUM_F) \
2749 T(ts_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 0, 1, 1, 8, \
2750 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2751 T(ts_vlan, 0, 1, 0, 0, 1, 0, 0, 8, \
2753 T(ts_vlan_l3l4csum, 0, 1, 0, 0, 1, 0, 1, 8, \
2754 TSP_F | VLAN_F | L3L4CSUM_F) \
2755 T(ts_vlan_ol3ol4csum, 0, 1, 0, 0, 1, 1, 0, 8, \
2756 TSP_F | VLAN_F | OL3OL4CSUM_F) \
2757 T(ts_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 1, 8, \
2758 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2759 T(ts_noff, 0, 1, 0, 1, 0, 0, 0, 8, \
2761 T(ts_noff_l3l4csum, 0, 1, 0, 1, 0, 0, 1, 8, \
2762 TSP_F | NOFF_F | L3L4CSUM_F) \
2763 T(ts_noff_ol3ol4csum, 0, 1, 0, 1, 0, 1, 0, 8, \
2764 TSP_F | NOFF_F | OL3OL4CSUM_F) \
2765 T(ts_noff_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 0, 1, 1, 8, \
2766 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2767 T(ts_noff_vlan, 0, 1, 0, 1, 1, 0, 0, 8, \
2768 TSP_F | NOFF_F | VLAN_F) \
2769 T(ts_noff_vlan_l3l4csum, 0, 1, 0, 1, 1, 0, 1, 8, \
2770 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2771 T(ts_noff_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 1, 0, 8, \
2772 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2773 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 1, 8, \
2774 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2775 T(ts_tso, 0, 1, 1, 0, 0, 0, 0, 8, \
2777 T(ts_tso_l3l4csum, 0, 1, 1, 0, 0, 0, 1, 8, \
2778 TSP_F | TSO_F | L3L4CSUM_F) \
2779 T(ts_tso_ol3ol4csum, 0, 1, 1, 0, 0, 1, 0, 8, \
2780 TSP_F | TSO_F | OL3OL4CSUM_F) \
2781 T(ts_tso_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 0, 1, 1, 8, \
2782 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2783 T(ts_tso_vlan, 0, 1, 1, 0, 1, 0, 0, 8, \
2784 TSP_F | TSO_F | VLAN_F) \
2785 T(ts_tso_vlan_l3l4csum, 0, 1, 1, 0, 1, 0, 1, 8, \
2786 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2787 T(ts_tso_vlan_ol3ol4csum, 0, 1, 1, 0, 1, 1, 0, 8, \
2788 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2789 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 1, 8, \
2790 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2791 T(ts_tso_noff, 0, 1, 1, 1, 0, 0, 0, 8, \
2792 TSP_F | TSO_F | NOFF_F) \
2793 T(ts_tso_noff_l3l4csum, 0, 1, 1, 1, 0, 0, 1, 8, \
2794 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2795 T(ts_tso_noff_ol3ol4csum, 0, 1, 1, 1, 0, 1, 0, 8, \
2796 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2797 T(ts_tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 0, 1, 1, 8, \
2798 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2799 T(ts_tso_noff_vlan, 0, 1, 1, 1, 1, 0, 0, 8, \
2800 TSP_F | TSO_F | NOFF_F | VLAN_F) \
2801 T(ts_tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 1, 0, 1, 8, \
2802 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2803 T(ts_tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 1, 0, 8, \
2804 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2805 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 1, 8, \
2806 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2807 T(sec, 1, 0, 0, 0, 0, 0, 0, 4, \
2809 T(sec_l3l4csum, 1, 0, 0, 0, 0, 0, 1, 4, \
2810 T_SEC_F | L3L4CSUM_F) \
2811 T(sec_ol3ol4csum, 1, 0, 0, 0, 0, 1, 0, 4, \
2812 T_SEC_F | OL3OL4CSUM_F) \
2813 T(sec_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 0, 1, 1, 4, \
2814 T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2815 T(sec_vlan, 1, 0, 0, 0, 1, 0, 0, 6, \
2817 T(sec_vlan_l3l4csum, 1, 0, 0, 0, 1, 0, 1, 6, \
2818 T_SEC_F | VLAN_F | L3L4CSUM_F) \
2819 T(sec_vlan_ol3ol4csum, 1, 0, 0, 0, 1, 1, 0, 6, \
2820 T_SEC_F | VLAN_F | OL3OL4CSUM_F) \
2821 T(sec_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 1, 6, \
2822 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2823 T(sec_noff, 1, 0, 0, 1, 0, 0, 0, 4, \
2825 T(sec_noff_l3l4csum, 1, 0, 0, 1, 0, 0, 1, 4, \
2826 T_SEC_F | NOFF_F | L3L4CSUM_F) \
2827 T(sec_noff_ol3ol4csum, 1, 0, 0, 1, 0, 1, 0, 4, \
2828 T_SEC_F | NOFF_F | OL3OL4CSUM_F) \
2829 T(sec_noff_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 0, 1, 1, 4, \
2830 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2831 T(sec_noff_vlan, 1, 0, 0, 1, 1, 0, 0, 6, \
2832 T_SEC_F | NOFF_F | VLAN_F) \
2833 T(sec_noff_vlan_l3l4csum, 1, 0, 0, 1, 1, 0, 1, 6, \
2834 T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2835 T(sec_noff_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 1, 0, 6, \
2836 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2837 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 1, 6, \
2838 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2839 T(sec_tso, 1, 0, 1, 0, 0, 0, 0, 6, \
2841 T(sec_tso_l3l4csum, 1, 0, 1, 0, 0, 0, 1, 6, \
2842 T_SEC_F | TSO_F | L3L4CSUM_F) \
2843 T(sec_tso_ol3ol4csum, 1, 0, 1, 0, 0, 1, 0, 6, \
2844 T_SEC_F | TSO_F | OL3OL4CSUM_F) \
2845 T(sec_tso_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 0, 1, 1, 6, \
2846 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2847 T(sec_tso_vlan, 1, 0, 1, 0, 1, 0, 0, 6, \
2848 T_SEC_F | TSO_F | VLAN_F) \
2849 T(sec_tso_vlan_l3l4csum, 1, 0, 1, 0, 1, 0, 1, 6, \
2850 T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2851 T(sec_tso_vlan_ol3ol4csum, 1, 0, 1, 0, 1, 1, 0, 6, \
2852 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2853 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 1, 6, \
2854 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2855 T(sec_tso_noff, 1, 0, 1, 1, 0, 0, 0, 6, \
2856 T_SEC_F | TSO_F | NOFF_F) \
2857 T(sec_tso_noff_l3l4csum, 1, 0, 1, 1, 0, 0, 1, 6, \
2858 T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2859 T(sec_tso_noff_ol3ol4csum, 1, 0, 1, 1, 0, 1, 0, 6, \
2860 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2861 T(sec_tso_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 0, 1, 1, 6, \
2862 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2863 T(sec_tso_noff_vlan, 1, 0, 1, 1, 1, 0, 0, 6, \
2864 T_SEC_F | TSO_F | NOFF_F | VLAN_F) \
2865 T(sec_tso_noff_vlan_l3l4csum, 1, 0, 1, 1, 1, 0, 1, 6, \
2866 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2867 T(sec_tso_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 1, 0, 6, \
2868 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2869 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 1, 6, \
2870 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2871 T(sec_ts, 1, 1, 0, 0, 0, 0, 0, 8, \
2873 T(sec_ts_l3l4csum, 1, 1, 0, 0, 0, 0, 1, 8, \
2874 T_SEC_F | TSP_F | L3L4CSUM_F) \
2875 T(sec_ts_ol3ol4csum, 1, 1, 0, 0, 0, 1, 0, 8, \
2876 T_SEC_F | TSP_F | OL3OL4CSUM_F) \
2877 T(sec_ts_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 0, 1, 1, 8, \
2878 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2879 T(sec_ts_vlan, 1, 1, 0, 0, 1, 0, 0, 8, \
2880 T_SEC_F | TSP_F | VLAN_F) \
2881 T(sec_ts_vlan_l3l4csum, 1, 1, 0, 0, 1, 0, 1, 8, \
2882 T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \
2883 T(sec_ts_vlan_ol3ol4csum, 1, 1, 0, 0, 1, 1, 0, 8, \
2884 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \
2885 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 1, 8, \
2886 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2887 T(sec_ts_noff, 1, 1, 0, 1, 0, 0, 0, 8, \
2888 T_SEC_F | TSP_F | NOFF_F) \
2889 T(sec_ts_noff_l3l4csum, 1, 1, 0, 1, 0, 0, 1, 8, \
2890 T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \
2891 T(sec_ts_noff_ol3ol4csum, 1, 1, 0, 1, 0, 1, 0, 8, \
2892 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \
2893 T(sec_ts_noff_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 0, 1, 1, 8, \
2894 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2895 T(sec_ts_noff_vlan, 1, 1, 0, 1, 1, 0, 0, 8, \
2896 T_SEC_F | TSP_F | NOFF_F | VLAN_F) \
2897 T(sec_ts_noff_vlan_l3l4csum, 1, 1, 0, 1, 1, 0, 1, 8, \
2898 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2899 T(sec_ts_noff_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 1, 0, 8, \
2900 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2901 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 1, 8, \
2902 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2903 T(sec_ts_tso, 1, 1, 1, 0, 0, 0, 0, 8, \
2904 T_SEC_F | TSP_F | TSO_F) \
2905 T(sec_ts_tso_l3l4csum, 1, 1, 1, 0, 0, 0, 1, 8, \
2906 T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \
2907 T(sec_ts_tso_ol3ol4csum, 1, 1, 1, 0, 0, 1, 0, 8, \
2908 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \
2909 T(sec_ts_tso_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 0, 1, 1, 8, \
2910 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2911 T(sec_ts_tso_vlan, 1, 1, 1, 0, 1, 0, 0, 8, \
2912 T_SEC_F | TSP_F | TSO_F | VLAN_F) \
2913 T(sec_ts_tso_vlan_l3l4csum, 1, 1, 1, 0, 1, 0, 1, 8, \
2914 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2915 T(sec_ts_tso_vlan_ol3ol4csum, 1, 1, 1, 0, 1, 1, 0, 8, \
2916 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2917 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 1, 8, \
2918 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2919 T(sec_ts_tso_noff, 1, 1, 1, 1, 0, 0, 0, 8, \
2920 T_SEC_F | TSP_F | TSO_F | NOFF_F) \
2921 T(sec_ts_tso_noff_l3l4csum, 1, 1, 1, 1, 0, 0, 1, 8, \
2922 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2923 T(sec_ts_tso_noff_ol3ol4csum, 1, 1, 1, 1, 0, 1, 0, 8, \
2924 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2925 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 0, 1, 1, 8, \
2926 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2927 T(sec_ts_tso_noff_vlan, 1, 1, 1, 1, 1, 0, 0, 8, \
2928 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \
2929 T(sec_ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 1, 0, 1, 8, \
2930 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2931 T(sec_ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 1, 0, 8, \
2932 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)\
2933 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 1, 8, \
2934 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2937 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \
2938 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
2939 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2941 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
2942 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2944 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
2945 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2947 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2948 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2950 NIX_TX_FASTPATH_MODES
2953 #endif /* __CN10K_TX_H__ */