1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #include <rte_eventdev.h>
11 #define NIX_TX_OFFLOAD_NONE (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
19 #define NIX_TX_OFFLOAD_MAX (NIX_TX_OFFLOAD_SECURITY_F << 1)
21 /* Flags to control xmit_prepare function.
22 * Defining it from backwards to denote its been
23 * not used as offload flags to pick function
25 #define NIX_TX_VWQE_F BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
28 #define NIX_TX_NEED_SEND_HDR_W1 \
29 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
30 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
32 #define NIX_TX_NEED_EXT_HDR \
33 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
38 /* Cached value is low, Update the fc_cache_pkts */ \
39 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
40 /* Multiply with sqe_per_sqb to express in pkts */ \
41 (txq)->fc_cache_pkts = \
42 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
43 << (txq)->sqes_per_sqb_log2; \
44 /* Check it again for the room */ \
45 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51 * is encoded as 4bits.
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
57 /* Function to determine no of tx subdesc required in case ext
58 * sub desc is enabled.
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
63 return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
66 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
74 if (!(flags & NIX_TX_MULTI_SEG_F))
75 return cn10k_nix_tx_ext_subs(flags) + 2;
77 /* Already everything is accounted for in segdw */
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
84 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85 << ROC_LMT_LINES_PER_CORE_LOG2;
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
91 return (flags & NIX_TX_NEED_EXT_HDR) ?
92 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
99 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
102 /* This will be moved to addr area */
104 /* 15 vector sizes for single seg */
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
127 return ((flags & NIX_TX_NEED_EXT_HDR) ?
128 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
135 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
138 /* This will be moved to addr area */
140 /* 15 vector sizes for single seg */
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
163 /* We have two CPT instructions per LMTLine */
164 const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
167 /* This will be moved to addr area */
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
190 const uint16_t flags)
193 cmd[0] = txq->send_hdr_w0;
197 /* Send ext if present */
198 if (flags & NIX_TX_NEED_EXT_HDR) {
199 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
208 static __rte_always_inline void
209 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
210 uint8_t loff, uint8_t shft)
215 /* Check if there is any CPT instruction to submit */
219 data = cn10k_cpt_tx_steor_data();
220 /* Update lmtline use for partial end line */
222 data &= ~(0x7ULL << shft);
223 /* Update it to half full i.e 64B */
224 data |= (0x3UL << shft);
227 pa = io_addr | ((data >> 16) & 0x7) << 4;
228 data &= ~(0x7ULL << 16);
229 /* Update lines - 1 that contain valid data */
230 data |= ((uint64_t)(lnum + loff - 1)) << 12;
234 roc_lmt_submit_steorl(data, pa);
237 #if defined(RTE_ARCH_ARM64)
238 static __rte_always_inline void
239 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
240 uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
241 uint8_t *loff, uint8_t *shft, uint64_t sa_base,
242 const uint16_t flags)
244 struct cn10k_sec_sess_priv sess_priv;
245 uint32_t pkt_len, dlen_adj, rlen;
246 uint64x2_t cmd01, cmd23;
247 uintptr_t dptr, nixtx;
248 uint64_t ucode_cmd[4];
254 sess_priv.u64 = *rte_security_dynfield(m);
256 if (flags & NIX_TX_NEED_SEND_HDR_W1)
257 l2_len = vgetq_lane_u8(*cmd0, 8);
262 dptr = vgetq_lane_u64(*cmd1, 1);
263 pkt_len = vgetq_lane_u16(*cmd0, 0);
265 /* Calculate dlen adj */
266 dlen_adj = pkt_len - l2_len;
267 rlen = (dlen_adj + sess_priv.roundup_len) +
268 (sess_priv.roundup_byte - 1);
269 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
270 rlen += sess_priv.partial_len;
271 dlen_adj = rlen - dlen_adj;
273 /* Update send descriptors. Security is single segment only */
274 *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
275 *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
277 /* Get area where NIX descriptor needs to be stored */
278 nixtx = dptr + pkt_len + dlen_adj;
280 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
282 /* Return nixtx addr */
283 *nixtx_addr = (nixtx + 16);
285 /* DLEN passed is excluding L2HDR */
287 tag = sa_base & 0xFFFFUL;
288 sa_base &= ~0xFFFFUL;
289 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
290 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
292 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
294 /* CPT Word 0 and Word 1 */
295 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
296 /* CPT_RES_S is 16B above NIXTX */
297 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
299 /* CPT word 2 and 3 */
300 cmd23 = vdupq_n_u64(0);
301 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
302 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
303 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
307 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
308 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
309 *((uint16_t *)(dptr - 2)) =
310 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
312 *((uint16_t *)(dptr - 2)) =
313 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
319 /* Move to our line */
320 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
322 /* Write CPT instruction to lmt line */
323 vst1q_u64(laddr, cmd01);
324 vst1q_u64((laddr + 2), cmd23);
326 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
327 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
329 /* Move to next line for every other CPT inst */
331 *lnum = *lnum + (*loff ? 0 : 1);
332 *shft = *shft + (*loff ? 0 : 3);
335 static __rte_always_inline void
336 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
337 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
338 uint64_t sa_base, const uint16_t flags)
340 struct cn10k_sec_sess_priv sess_priv;
341 uint32_t pkt_len, dlen_adj, rlen;
342 struct nix_send_hdr_s *send_hdr;
343 uint64x2_t cmd01, cmd23;
344 union nix_send_sg_s *sg;
345 uintptr_t dptr, nixtx;
346 uint64_t ucode_cmd[4];
352 /* Move to our line from base */
353 sess_priv.u64 = *rte_security_dynfield(m);
354 send_hdr = (struct nix_send_hdr_s *)cmd;
355 if (flags & NIX_TX_NEED_EXT_HDR)
356 sg = (union nix_send_sg_s *)&cmd[4];
358 sg = (union nix_send_sg_s *)&cmd[2];
360 if (flags & NIX_TX_NEED_SEND_HDR_W1)
361 l2_len = cmd[1] & 0xFF;
366 dptr = *(uint64_t *)(sg + 1);
367 pkt_len = send_hdr->w0.total;
369 /* Calculate dlen adj */
370 dlen_adj = pkt_len - l2_len;
371 rlen = (dlen_adj + sess_priv.roundup_len) +
372 (sess_priv.roundup_byte - 1);
373 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
374 rlen += sess_priv.partial_len;
375 dlen_adj = rlen - dlen_adj;
377 /* Update send descriptors. Security is single segment only */
378 send_hdr->w0.total = pkt_len + dlen_adj;
379 sg->seg1_size = pkt_len + dlen_adj;
381 /* Get area where NIX descriptor needs to be stored */
382 nixtx = dptr + pkt_len + dlen_adj;
384 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
386 /* Return nixtx addr */
387 *nixtx_addr = (nixtx + 16);
389 /* DLEN passed is excluding L2HDR */
391 tag = sa_base & 0xFFFFUL;
392 sa_base &= ~0xFFFFUL;
393 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
394 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
396 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
398 /* CPT Word 0 and Word 1. Assume no multi-seg support */
399 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
400 /* CPT_RES_S is 16B above NIXTX */
401 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
403 /* CPT word 2 and 3 */
404 cmd23 = vdupq_n_u64(0);
405 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
406 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
407 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
411 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
412 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
413 *((uint16_t *)(dptr - 2)) =
414 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
416 *((uint16_t *)(dptr - 2)) =
417 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
422 /* Move to our line */
423 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
425 /* Write CPT instruction to lmt line */
426 vst1q_u64(laddr, cmd01);
427 vst1q_u64((laddr + 2), cmd23);
429 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
430 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
432 /* Move to next line for every other CPT inst */
434 *lnum = *lnum + (*loff ? 0 : 1);
435 *shft = *shft + (*loff ? 0 : 3);
440 static __rte_always_inline void
441 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
442 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
443 uint64_t sa_base, const uint16_t flags)
447 RTE_SET_USED(nixtx_addr);
452 RTE_SET_USED(sa_base);
457 static __rte_always_inline void
458 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
460 uint64_t mask, ol_flags = m->ol_flags;
462 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
463 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
464 uint16_t *iplen, *oiplen, *oudplen;
465 uint16_t lso_sb, paylen;
467 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
468 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
469 m->l2_len + m->l3_len + m->l4_len;
471 /* Reduce payload len from base headers */
472 paylen = m->pkt_len - lso_sb;
474 /* Get iplen position assuming no tunnel hdr */
475 iplen = (uint16_t *)(mdata + m->l2_len +
476 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
477 /* Handle tunnel tso */
478 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
479 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
480 const uint8_t is_udp_tun =
481 (CNXK_NIX_UDP_TUN_BITMASK >>
482 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
485 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
487 RTE_MBUF_F_TX_OUTER_IPV6)));
488 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
491 /* Update format for UDP tunneled packet */
493 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
494 m->outer_l3_len + 4);
495 *oudplen = rte_cpu_to_be_16(
496 rte_be_to_cpu_16(*oudplen) - paylen);
499 /* Update iplen position to inner ip hdr */
500 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
502 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
505 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
509 static __rte_always_inline void
510 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
511 const uint64_t lso_tun_fmt, bool *sec)
513 struct nix_send_ext_s *send_hdr_ext;
514 struct nix_send_hdr_s *send_hdr;
515 uint64_t ol_flags = 0, mask;
516 union nix_send_hdr_w1_u w1;
517 union nix_send_sg_s *sg;
519 send_hdr = (struct nix_send_hdr_s *)cmd;
520 if (flags & NIX_TX_NEED_EXT_HDR) {
521 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
522 sg = (union nix_send_sg_s *)(cmd + 4);
523 /* Clear previous markings */
524 send_hdr_ext->w0.lso = 0;
525 send_hdr_ext->w1.u = 0;
527 sg = (union nix_send_sg_s *)(cmd + 2);
530 if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
531 ol_flags = m->ol_flags;
535 if (!(flags & NIX_TX_MULTI_SEG_F))
536 send_hdr->w0.total = m->data_len;
538 send_hdr->w0.total = m->pkt_len;
539 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
543 * 3 => IPV4 with csum
545 * L3type and L3ptr needs to be set for either
546 * L3 csum or L4 csum or LSO
550 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
551 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
552 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
553 const uint8_t ol3type =
554 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
555 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
556 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
559 w1.ol3type = ol3type;
560 mask = 0xffffull << ((!!ol3type) << 4);
561 w1.ol3ptr = ~mask & m->outer_l2_len;
562 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
565 w1.ol4type = csum + (csum << 1);
568 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
569 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
570 w1.il3ptr = w1.ol4ptr + m->l2_len;
571 w1.il4ptr = w1.il3ptr + m->l3_len;
572 /* Increment it by 1 if it is IPV4 as 3 is with csum */
573 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
576 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
578 /* In case of no tunnel header use only
579 * shift IL3/IL4 fields a bit to use
580 * OL3/OL4 for header checksum
583 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
584 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
586 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
587 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
588 const uint8_t outer_l2_len = m->outer_l2_len;
591 w1.ol3ptr = outer_l2_len;
592 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
593 /* Increment it by 1 if it is IPV4 as 3 is with csum */
594 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
595 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
596 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
599 w1.ol4type = csum + (csum << 1);
601 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
602 const uint8_t l2_len = m->l2_len;
604 /* Always use OLXPTR and OLXTYPE when only
605 * when one header is present
610 w1.ol4ptr = l2_len + m->l3_len;
611 /* Increment it by 1 if it is IPV4 as 3 is with csum */
612 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
613 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
614 !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
617 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
620 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
621 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
622 /* HW will update ptr after vlan0 update */
623 send_hdr_ext->w1.vlan1_ins_ptr = 12;
624 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
626 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
627 /* 2B before end of l2 header */
628 send_hdr_ext->w1.vlan0_ins_ptr = 12;
629 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
632 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
636 mask = -(!w1.il3type);
637 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
639 send_hdr_ext->w0.lso_sb = lso_sb;
640 send_hdr_ext->w0.lso = 1;
641 send_hdr_ext->w0.lso_mps = m->tso_segsz;
642 send_hdr_ext->w0.lso_format =
643 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
644 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
646 /* Handle tunnel tso */
647 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
648 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
649 const uint8_t is_udp_tun =
650 (CNXK_NIX_UDP_TUN_BITMASK >>
651 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
653 uint8_t shift = is_udp_tun ? 32 : 0;
655 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
656 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
658 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
659 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
660 /* Update format for UDP tunneled packet */
661 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
665 if (flags & NIX_TX_NEED_SEND_HDR_W1)
666 send_hdr->w1.u = w1.u;
668 if (!(flags & NIX_TX_MULTI_SEG_F)) {
669 sg->seg1_size = send_hdr->w0.total;
670 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
672 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
673 /* DF bit = 1 if refcount of current mbuf or parent mbuf
675 * DF bit = 0 otherwise
677 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
679 /* Mark mempool object as "put" since it is freed by NIX */
680 if (!send_hdr->w0.df)
681 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
683 sg->seg1_size = m->data_len;
684 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
686 /* NOFF is handled later for multi-seg */
689 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
690 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
693 static __rte_always_inline void
694 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
695 const uint16_t flags)
697 struct nix_send_ext_s *send_hdr_ext;
698 union nix_send_sg_s *sg;
700 /* With minimal offloads, 'cmd' being local could be optimized out to
701 * registers. In other cases, 'cmd' will be in stack. Intent is
702 * 'cmd' stores content from txq->cmd which is copied only once.
704 *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
706 if (flags & NIX_TX_NEED_EXT_HDR) {
707 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
708 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
711 sg = (union nix_send_sg_s *)(cmd + 4);
713 sg = (union nix_send_sg_s *)(cmd + 2);
715 /* In case of multi-seg, sg template is stored here */
716 *((union nix_send_sg_s *)lmt_addr) = *sg;
717 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
720 static __rte_always_inline void
721 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
722 const uint64_t ol_flags, const uint16_t no_segdw,
723 const uint16_t flags)
725 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
726 const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
727 struct nix_send_ext_s *send_hdr_ext =
728 (struct nix_send_ext_s *)lmt_addr + 16;
729 uint64_t *lmt = (uint64_t *)lmt_addr;
730 uint16_t off = (no_segdw - 1) << 1;
731 struct nix_send_mem_s *send_mem;
733 send_mem = (struct nix_send_mem_s *)(lmt + off);
734 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
735 send_hdr_ext->w0.tstmp = 1;
736 if (flags & NIX_TX_MULTI_SEG_F) {
737 /* Retrieving the default desc values */
740 /* Using compiler barrier to avoid violation of C
743 rte_compiler_barrier();
746 /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
747 * should not be recorded, hence changing the alg type to
748 * NIX_SENDMEMALG_SET and also changing send mem addr field to
749 * next 8 bytes as it corrupts the actual Tx tstamp registered
752 send_mem->w0.subdc = NIX_SUBDC_MEM;
753 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
755 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
759 static __rte_always_inline uint16_t
760 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
762 struct nix_send_hdr_s *send_hdr;
763 union nix_send_sg_s *sg;
764 struct rte_mbuf *m_next;
765 uint64_t *slist, sg_u;
770 send_hdr = (struct nix_send_hdr_s *)cmd;
772 if (flags & NIX_TX_NEED_EXT_HDR)
777 sg = (union nix_send_sg_s *)&cmd[2 + off];
779 /* Start from second segment, first segment is already there */
782 nb_segs = m->nb_segs - 1;
784 slist = &cmd[3 + off + 1];
786 /* Set invert df if buffer is not to be freed by H/W */
787 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
788 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
790 /* Mark mempool object as "put" since it is freed by NIX */
791 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
792 if (!(sg_u & (1ULL << 55)))
793 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
800 /* Fill mbuf segments */
803 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
804 *slist = rte_mbuf_data_iova(m);
805 /* Set invert df if buffer is not to be freed by H/W */
806 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
807 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
808 /* Mark mempool object as "put" since it is freed by NIX
810 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
811 if (!(sg_u & (1ULL << (i + 55))))
812 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
817 if (i > 2 && nb_segs) {
819 /* Next SG subdesc */
820 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
823 sg = (union nix_send_sg_s *)slist;
833 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
834 /* Roundup extra dwords to multiple of 2 */
835 segdw = (segdw >> 1) + (segdw & 0x1);
837 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
838 send_hdr->w0.sizem1 = segdw - 1;
843 static __rte_always_inline uint16_t
844 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
845 uint64_t *cmd, uintptr_t base, const uint16_t flags)
847 struct cn10k_eth_txq *txq = tx_queue;
848 const rte_iova_t io_addr = txq->io_addr;
849 uint8_t lnum, c_lnum, c_shft, c_loff;
850 uintptr_t pa, lbase = txq->lmt_base;
851 uint16_t lmt_id, burst, left, i;
852 uintptr_t c_lbase = lbase;
853 rte_iova_t c_io_addr;
854 uint64_t lso_tun_fmt;
861 if (!(flags & NIX_TX_VWQE_F)) {
862 NIX_XMIT_FC_OR_RETURN(txq, pkts);
863 /* Reduce the cached count */
864 txq->fc_cache_pkts -= pkts;
867 /* Get cmd skeleton */
868 cn10k_nix_tx_skeleton(txq, cmd, flags);
870 if (flags & NIX_TX_OFFLOAD_TSO_F)
871 lso_tun_fmt = txq->lso_tun_fmt;
873 /* Get LMT base address and LMT ID as lcore id */
874 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
875 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
876 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
877 c_io_addr = txq->cpt_io_addr;
878 sa_base = txq->sa_base;
883 burst = left > 32 ? 32 : left;
886 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
892 for (i = 0; i < burst; i++) {
893 /* Perform header writes for TSO, barrier at
894 * lmt steorl will suffice.
896 if (flags & NIX_TX_OFFLOAD_TSO_F)
897 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
899 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
902 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
904 /* Prepare CPT instruction and get nixtx addr */
905 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
906 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
907 &c_lnum, &c_loff, &c_shft, sa_base,
910 /* Move NIX desc to LMT/NIXTX area */
911 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
912 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
913 tx_pkts[i]->ol_flags, 4, flags);
914 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
918 if (flags & NIX_TX_VWQE_F)
919 roc_sso_hws_head_wait(base);
924 /* Submit CPT instructions if any */
925 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
926 /* Reduce pkts to be sent to CPT */
927 burst -= ((c_lnum << 1) + c_loff);
928 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
934 data = cn10k_nix_tx_steor_data(flags);
935 pa = io_addr | (data & 0x7) << 4;
937 data |= (15ULL << 12);
938 data |= (uint64_t)lmt_id;
941 roc_lmt_submit_steorl(data, pa);
943 data = cn10k_nix_tx_steor_data(flags);
944 pa = io_addr | (data & 0x7) << 4;
946 data |= ((uint64_t)(burst - 17)) << 12;
947 data |= (uint64_t)(lmt_id + 16);
950 roc_lmt_submit_steorl(data, pa);
952 data = cn10k_nix_tx_steor_data(flags);
953 pa = io_addr | (data & 0x7) << 4;
955 data |= ((uint64_t)(burst - 1)) << 12;
959 roc_lmt_submit_steorl(data, pa);
969 static __rte_always_inline uint16_t
970 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
971 uint16_t pkts, uint64_t *cmd, uintptr_t base,
972 const uint16_t flags)
974 struct cn10k_eth_txq *txq = tx_queue;
975 uintptr_t pa0, pa1, lbase = txq->lmt_base;
976 const rte_iova_t io_addr = txq->io_addr;
977 uint16_t segdw, lmt_id, burst, left, i;
978 uint8_t lnum, c_lnum, c_loff;
979 uintptr_t c_lbase = lbase;
980 uint64_t data0, data1;
981 rte_iova_t c_io_addr;
982 uint64_t lso_tun_fmt;
983 uint8_t shft, c_shft;
990 NIX_XMIT_FC_OR_RETURN(txq, pkts);
992 cn10k_nix_tx_skeleton(txq, cmd, flags);
994 /* Reduce the cached count */
995 txq->fc_cache_pkts -= pkts;
997 if (flags & NIX_TX_OFFLOAD_TSO_F)
998 lso_tun_fmt = txq->lso_tun_fmt;
1000 /* Get LMT base address and LMT ID as lcore id */
1001 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1002 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1003 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1004 c_io_addr = txq->cpt_io_addr;
1005 sa_base = txq->sa_base;
1010 burst = left > 32 ? 32 : left;
1015 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1021 for (i = 0; i < burst; i++) {
1022 /* Perform header writes for TSO, barrier at
1023 * lmt steorl will suffice.
1025 if (flags & NIX_TX_OFFLOAD_TSO_F)
1026 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1028 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1031 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1033 /* Prepare CPT instruction and get nixtx addr */
1034 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1035 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1036 &c_lnum, &c_loff, &c_shft, sa_base,
1039 /* Move NIX desc to LMT/NIXTX area */
1040 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1042 /* Store sg list directly on lmt line */
1043 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1045 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
1046 tx_pkts[i]->ol_flags, segdw,
1048 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1050 data128 |= (((__uint128_t)(segdw - 1)) << shft);
1055 if (flags & NIX_TX_VWQE_F)
1056 roc_sso_hws_head_wait(base);
1061 /* Submit CPT instructions if any */
1062 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1063 /* Reduce pkts to be sent to CPT */
1064 burst -= ((c_lnum << 1) + c_loff);
1065 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1069 data0 = (uint64_t)data128;
1070 data1 = (uint64_t)(data128 >> 64);
1071 /* Make data0 similar to data1 */
1075 pa0 = io_addr | (data0 & 0x7) << 4;
1077 /* Move lmtst1..15 sz to bits 63:19 */
1079 data0 |= (15ULL << 12);
1080 data0 |= (uint64_t)lmt_id;
1083 roc_lmt_submit_steorl(data0, pa0);
1085 pa1 = io_addr | (data1 & 0x7) << 4;
1088 data1 |= ((uint64_t)(burst - 17)) << 12;
1089 data1 |= (uint64_t)(lmt_id + 16);
1092 roc_lmt_submit_steorl(data1, pa1);
1094 pa0 = io_addr | (data0 & 0x7) << 4;
1096 /* Move lmtst1..15 sz to bits 63:19 */
1098 data0 |= ((burst - 1) << 12);
1099 data0 |= (uint64_t)lmt_id;
1102 roc_lmt_submit_steorl(data0, pa0);
1112 #if defined(RTE_ARCH_ARM64)
1114 static __rte_always_inline void
1115 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1116 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1117 const uint64_t flags, const uint64_t lso_tun_fmt)
1122 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1125 mask = -(!w1->il3type);
1126 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1129 w0->lso_sb = lso_sb;
1130 w0->lso_mps = m->tso_segsz;
1131 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1132 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1134 /* Handle tunnel tso */
1135 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1136 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1137 const uint8_t is_udp_tun =
1138 (CNXK_NIX_UDP_TUN_BITMASK >>
1139 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1141 uint8_t shift = is_udp_tun ? 32 : 0;
1143 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1144 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1146 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1147 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1148 /* Update format for UDP tunneled packet */
1150 w0->lso_format = (lso_tun_fmt >> shift);
1154 static __rte_always_inline void
1155 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1156 union nix_send_hdr_w0_u *sh,
1157 union nix_send_sg_s *sg, const uint32_t flags)
1159 struct rte_mbuf *m_next;
1160 uint64_t *slist, sg_u;
1164 sh->total = m->pkt_len;
1165 /* Clear sg->u header before use */
1166 sg->u &= 0xFC00000000000000;
1170 sg_u = sg_u | ((uint64_t)m->data_len);
1172 nb_segs = m->nb_segs - 1;
1175 /* Set invert df if buffer is not to be freed by H/W */
1176 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1177 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1178 /* Mark mempool object as "put" since it is freed by NIX */
1179 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1180 if (!(sg_u & (1ULL << 55)))
1181 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1186 /* Fill mbuf segments */
1189 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1190 *slist = rte_mbuf_data_iova(m);
1191 /* Set invert df if buffer is not to be freed by H/W */
1192 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1193 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1194 /* Mark mempool object as "put" since it is freed by NIX
1196 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1197 if (!(sg_u & (1ULL << (i + 55))))
1198 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1204 if (i > 2 && nb_segs) {
1206 /* Next SG subdesc */
1207 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1210 sg = (union nix_send_sg_s *)slist;
1221 static __rte_always_inline void
1222 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1223 uint64x2_t *cmd1, const uint8_t segdw,
1224 const uint32_t flags)
1226 union nix_send_hdr_w0_u sh;
1227 union nix_send_sg_s sg;
1229 if (m->nb_segs == 1) {
1230 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1231 sg.u = vgetq_lane_u64(cmd1[0], 0);
1232 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1233 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1236 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1237 sg.u = vgetq_lane_u64(cmd1[0], 0);
1238 if (!(sg.u & (1ULL << 55)))
1239 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1245 sh.u = vgetq_lane_u64(cmd0[0], 0);
1246 sg.u = vgetq_lane_u64(cmd1[0], 0);
1248 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1250 sh.sizem1 = segdw - 1;
1251 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1252 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1255 #define NIX_DESCS_PER_LOOP 4
1257 static __rte_always_inline uint8_t
1258 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1259 uint64x2_t *cmd1, uint64x2_t *cmd2,
1260 uint64x2_t *cmd3, uint8_t *segdw,
1261 uint64_t *lmt_addr, __uint128_t *data128,
1262 uint8_t *shift, const uint16_t flags)
1264 uint8_t j, off, lmt_used;
1266 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1267 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1268 /* No segments in 4 consecutive packets. */
1269 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1270 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1271 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1274 vst1q_u64(lmt_addr, cmd0[0]);
1275 vst1q_u64(lmt_addr + 2, cmd1[0]);
1276 vst1q_u64(lmt_addr + 4, cmd0[1]);
1277 vst1q_u64(lmt_addr + 6, cmd1[1]);
1278 vst1q_u64(lmt_addr + 8, cmd0[2]);
1279 vst1q_u64(lmt_addr + 10, cmd1[2]);
1280 vst1q_u64(lmt_addr + 12, cmd0[3]);
1281 vst1q_u64(lmt_addr + 14, cmd1[3]);
1283 *data128 |= ((__uint128_t)7) << *shift;
1291 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1292 /* Fit consecutive packets in same LMTLINE. */
1293 if ((segdw[j] + segdw[j + 1]) <= 8) {
1294 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1295 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1298 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1301 segdw[j + 1], flags);
1302 /* TSTAMP takes 4 each, no segs. */
1303 vst1q_u64(lmt_addr, cmd0[j]);
1304 vst1q_u64(lmt_addr + 2, cmd2[j]);
1305 vst1q_u64(lmt_addr + 4, cmd1[j]);
1306 vst1q_u64(lmt_addr + 6, cmd3[j]);
1308 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1309 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1310 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1311 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1312 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1313 /* EXT header take 3 each, space for 2 segs.*/
1314 cn10k_nix_prepare_mseg_vec(mbufs[j],
1318 vst1q_u64(lmt_addr, cmd0[j]);
1319 vst1q_u64(lmt_addr + 2, cmd2[j]);
1320 vst1q_u64(lmt_addr + 4, cmd1[j]);
1323 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1324 lmt_addr + 12 + off,
1327 segdw[j + 1], flags);
1328 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1329 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1330 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1332 cn10k_nix_prepare_mseg_vec(mbufs[j],
1336 vst1q_u64(lmt_addr, cmd0[j]);
1337 vst1q_u64(lmt_addr + 2, cmd1[j]);
1340 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1344 segdw[j + 1], flags);
1345 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1346 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1348 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1353 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1354 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1355 cn10k_nix_prepare_mseg_vec(mbufs[j],
1359 vst1q_u64(lmt_addr, cmd0[j]);
1360 vst1q_u64(lmt_addr + 2, cmd2[j]);
1361 vst1q_u64(lmt_addr + 4, cmd1[j]);
1364 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1365 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1366 cn10k_nix_prepare_mseg_vec(mbufs[j],
1370 vst1q_u64(lmt_addr, cmd0[j]);
1371 vst1q_u64(lmt_addr + 2, cmd2[j]);
1372 vst1q_u64(lmt_addr + 4, cmd1[j]);
1374 cn10k_nix_prepare_mseg_vec(mbufs[j],
1378 vst1q_u64(lmt_addr, cmd0[j]);
1379 vst1q_u64(lmt_addr + 2, cmd1[j]);
1381 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1392 static __rte_always_inline void
1393 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1394 uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1396 /* Go to next line if we are out of space */
1397 if ((*loff + (dw << 4)) > 128) {
1398 *data128 = *data128 |
1399 (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1400 *shift = *shift + 3;
1405 *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1406 *loff = *loff + (dw << 4);
1409 static __rte_always_inline void
1410 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1411 uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1412 uint64x2_t cmd3, const uint16_t flags)
1416 /* Handle no fast free when security is enabled without mseg */
1417 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1418 (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1419 !(flags & NIX_TX_MULTI_SEG_F)) {
1420 union nix_send_sg_s sg;
1422 sg.u = vgetq_lane_u64(cmd1, 0);
1423 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1424 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1426 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1427 sg.u = vgetq_lane_u64(cmd1, 0);
1428 if (!(sg.u & (1ULL << 55)))
1429 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1434 if (flags & NIX_TX_MULTI_SEG_F) {
1435 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1436 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1437 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1438 &cmd0, &cmd1, segdw, flags);
1439 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1440 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1441 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1444 vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1445 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1446 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1447 &cmd0, &cmd1, segdw, flags);
1448 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1449 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1450 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1452 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1453 &cmd0, &cmd1, segdw, flags);
1454 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1455 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1457 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1458 /* Store the prepared send desc to LMT lines */
1459 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1460 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1461 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1462 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1463 vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1465 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1466 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1467 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1470 /* Store the prepared send desc to LMT lines */
1471 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1472 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1476 static __rte_always_inline uint16_t
1477 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1478 uint16_t pkts, uint64_t *cmd, uintptr_t base,
1479 const uint16_t flags)
1481 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1482 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1483 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1484 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1485 uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1486 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1487 uint64x2_t senddesc01_w0, senddesc23_w0;
1488 uint64x2_t senddesc01_w1, senddesc23_w1;
1489 uint64x2_t sendext01_w0, sendext23_w0;
1490 uint64x2_t sendext01_w1, sendext23_w1;
1491 uint64x2_t sendmem01_w0, sendmem23_w0;
1492 uint64x2_t sendmem01_w1, sendmem23_w1;
1493 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1494 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1495 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1496 struct cn10k_eth_txq *txq = tx_queue;
1497 rte_iova_t io_addr = txq->io_addr;
1498 uintptr_t laddr = txq->lmt_base;
1499 uint8_t c_lnum, c_shft, c_loff;
1500 uint64x2_t ltypes01, ltypes23;
1501 uint64x2_t xtmp128, ytmp128;
1502 uint64x2_t xmask01, xmask23;
1503 uintptr_t c_laddr = laddr;
1504 uint8_t lnum, shift, loff;
1505 rte_iova_t c_io_addr;
1508 __uint128_t data128;
1512 if (!(flags & NIX_TX_VWQE_F)) {
1513 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1514 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1515 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1516 /* Reduce the cached count */
1517 txq->fc_cache_pkts -= pkts;
1519 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1520 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1523 /* Perform header writes before barrier for TSO */
1524 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1525 for (i = 0; i < pkts; i++)
1526 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1529 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1530 senddesc23_w0 = senddesc01_w0;
1531 senddesc01_w1 = vdupq_n_u64(0);
1532 senddesc23_w1 = senddesc01_w1;
1533 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1534 sgdesc23_w0 = sgdesc01_w0;
1536 /* Load command defaults into vector variables. */
1537 if (flags & NIX_TX_NEED_EXT_HDR) {
1538 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1539 sendext23_w0 = sendext01_w0;
1540 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1541 sendext23_w1 = sendext01_w1;
1542 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1543 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1544 sendmem23_w0 = sendmem01_w0;
1545 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1546 sendmem23_w1 = sendmem01_w1;
1550 /* Get LMT base address and LMT ID as lcore id */
1551 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1552 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1553 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1554 c_io_addr = txq->cpt_io_addr;
1555 sa_base = txq->sa_base;
1560 /* Number of packets to prepare depends on offloads enabled. */
1561 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1562 cn10k_nix_pkts_per_vec_brst(flags) :
1564 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1569 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1576 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1577 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1582 if (flags & NIX_TX_MULTI_SEG_F) {
1585 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1586 struct rte_mbuf *m = tx_pkts[j];
1588 /* Get dwords based on nb_segs. */
1589 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1590 /* Add dwords based on offloads. */
1591 segdw[j] += 1 + /* SEND HDR */
1592 !!(flags & NIX_TX_NEED_EXT_HDR) +
1593 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1596 /* Check if there are enough LMTLINES for this loop */
1597 if (lnum + 4 > 32) {
1598 uint8_t ldwords_con = 0, lneeded = 0;
1599 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1600 ldwords_con += segdw[j];
1601 if (ldwords_con > 8) {
1603 ldwords_con = segdw[j];
1607 if (lnum + lneeded > 32) {
1613 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1615 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1616 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1618 senddesc23_w0 = senddesc01_w0;
1619 sgdesc23_w0 = sgdesc01_w0;
1621 /* Clear vlan enables. */
1622 if (flags & NIX_TX_NEED_EXT_HDR) {
1623 sendext01_w1 = vbicq_u64(sendext01_w1,
1624 vdupq_n_u64(0x3FFFF00FFFF00));
1625 sendext23_w1 = sendext01_w1;
1628 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1629 /* Reset send mem alg to SETTSTMP from SUB*/
1630 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1631 vdupq_n_u64(BIT_ULL(59)));
1632 /* Reset send mem address to default. */
1634 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1635 sendmem23_w0 = sendmem01_w0;
1636 sendmem23_w1 = sendmem01_w1;
1639 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1640 /* Clear the LSO enable bit. */
1641 sendext01_w0 = vbicq_u64(sendext01_w0,
1642 vdupq_n_u64(BIT_ULL(14)));
1643 sendext23_w0 = sendext01_w0;
1646 /* Move mbufs to iova */
1647 mbuf0 = (uint64_t *)tx_pkts[0];
1648 mbuf1 = (uint64_t *)tx_pkts[1];
1649 mbuf2 = (uint64_t *)tx_pkts[2];
1650 mbuf3 = (uint64_t *)tx_pkts[3];
1652 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1653 offsetof(struct rte_mbuf, buf_iova));
1654 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1655 offsetof(struct rte_mbuf, buf_iova));
1656 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1657 offsetof(struct rte_mbuf, buf_iova));
1658 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1659 offsetof(struct rte_mbuf, buf_iova));
1661 * Get mbuf's, olflags, iova, pktlen, dataoff
1662 * dataoff_iovaX.D[0] = iova,
1663 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1664 * len_olflagsX.D[0] = ol_flags,
1665 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1667 dataoff_iova0 = vld1q_u64(mbuf0);
1668 len_olflags0 = vld1q_u64(mbuf0 + 2);
1669 dataoff_iova1 = vld1q_u64(mbuf1);
1670 len_olflags1 = vld1q_u64(mbuf1 + 2);
1671 dataoff_iova2 = vld1q_u64(mbuf2);
1672 len_olflags2 = vld1q_u64(mbuf2 + 2);
1673 dataoff_iova3 = vld1q_u64(mbuf3);
1674 len_olflags3 = vld1q_u64(mbuf3 + 2);
1676 /* Move mbufs to point pool */
1677 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1678 offsetof(struct rte_mbuf, pool) -
1679 offsetof(struct rte_mbuf, buf_iova));
1680 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1681 offsetof(struct rte_mbuf, pool) -
1682 offsetof(struct rte_mbuf, buf_iova));
1683 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1684 offsetof(struct rte_mbuf, pool) -
1685 offsetof(struct rte_mbuf, buf_iova));
1686 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1687 offsetof(struct rte_mbuf, pool) -
1688 offsetof(struct rte_mbuf, buf_iova));
1690 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1691 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1692 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1694 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1695 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1698 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1699 : [a] "+w"(senddesc01_w1)
1700 : [in] "r"(mbuf0 + 2)
1703 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1704 : [a] "+w"(senddesc01_w1)
1705 : [in] "r"(mbuf1 + 2)
1708 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1709 : [b] "+w"(senddesc23_w1)
1710 : [in] "r"(mbuf2 + 2)
1713 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1714 : [b] "+w"(senddesc23_w1)
1715 : [in] "r"(mbuf3 + 2)
1718 /* Get pool pointer alone */
1719 mbuf0 = (uint64_t *)*mbuf0;
1720 mbuf1 = (uint64_t *)*mbuf1;
1721 mbuf2 = (uint64_t *)*mbuf2;
1722 mbuf3 = (uint64_t *)*mbuf3;
1724 /* Get pool pointer alone */
1725 mbuf0 = (uint64_t *)*mbuf0;
1726 mbuf1 = (uint64_t *)*mbuf1;
1727 mbuf2 = (uint64_t *)*mbuf2;
1728 mbuf3 = (uint64_t *)*mbuf3;
1731 const uint8x16_t shuf_mask2 = {
1732 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1733 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1735 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1736 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1738 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1739 const uint64x2_t and_mask0 = {
1744 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1745 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1746 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1747 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1750 * Pick only 16 bits of pktlen preset at bits 63:32
1751 * and place them at bits 15:0.
1753 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1754 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1756 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1757 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1758 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1760 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1761 * pktlen at 15:0 position.
1763 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1764 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1765 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1766 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1768 /* Move mbuf to point to pool_id. */
1769 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1770 offsetof(struct rte_mempool, pool_id));
1771 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1772 offsetof(struct rte_mempool, pool_id));
1773 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1774 offsetof(struct rte_mempool, pool_id));
1775 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1776 offsetof(struct rte_mempool, pool_id));
1778 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1779 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1781 * Lookup table to translate ol_flags to
1782 * il3/il4 types. But we still use ol3/ol4 types in
1783 * senddesc_w1 as only one header processing is enabled.
1785 const uint8x16_t tbl = {
1786 /* [0-15] = il4type:il3type */
1787 0x04, /* none (IPv6 assumed) */
1788 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1789 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1790 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1791 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1792 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1793 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1794 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1795 0x02, /* RTE_MBUF_F_TX_IPV4 */
1796 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1797 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1798 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1799 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1800 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1801 * RTE_MBUF_F_TX_TCP_CKSUM
1803 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1804 * RTE_MBUF_F_TX_SCTP_CKSUM
1806 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1807 * RTE_MBUF_F_TX_UDP_CKSUM
1811 /* Extract olflags to translate to iltypes */
1812 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1813 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1816 * E(47):L3_LEN(9):L2_LEN(7+z)
1817 * E(47):L3_LEN(9):L2_LEN(7+z)
1819 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1820 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1822 /* Move OLFLAGS bits 55:52 to 51:48
1823 * with zeros preprended on the byte and rest
1826 xtmp128 = vshrq_n_u8(xtmp128, 4);
1827 ytmp128 = vshrq_n_u8(ytmp128, 4);
1829 * E(48):L3_LEN(8):L2_LEN(z+7)
1830 * E(48):L3_LEN(8):L2_LEN(z+7)
1832 const int8x16_t tshft3 = {
1833 -1, 0, 8, 8, 8, 8, 8, 8,
1834 -1, 0, 8, 8, 8, 8, 8, 8,
1837 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1838 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1841 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1842 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1844 /* Pick only relevant fields i.e Bit 48:55 of iltype
1845 * and place it in ol3/ol4type of senddesc_w1
1847 const uint8x16_t shuf_mask0 = {
1848 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1849 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1852 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1853 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1855 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1856 * a [E(32):E(16):OL3(8):OL2(8)]
1858 * a [E(32):E(16):(OL3+OL2):OL2]
1859 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1861 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1862 vshlq_n_u16(senddesc01_w1, 8));
1863 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1864 vshlq_n_u16(senddesc23_w1, 8));
1866 /* Move ltypes to senddesc*_w1 */
1867 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1868 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1869 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1870 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1872 * Lookup table to translate ol_flags to
1876 const uint8x16_t tbl = {
1877 /* [0-15] = ol4type:ol3type */
1879 0x03, /* OUTER_IP_CKSUM */
1880 0x02, /* OUTER_IPV4 */
1881 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1882 0x04, /* OUTER_IPV6 */
1883 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1884 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1885 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1888 0x00, /* OUTER_UDP_CKSUM */
1889 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1890 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1891 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1894 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1895 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1898 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1901 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1902 * OUTER_IPV4 | OUTER_IP_CKSUM
1906 /* Extract olflags to translate to iltypes */
1907 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1908 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1911 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1912 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1914 const uint8x16_t shuf_mask5 = {
1915 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1916 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1918 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1919 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1921 /* Extract outer ol flags only */
1922 const uint64x2_t o_cksum_mask = {
1927 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1928 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1930 /* Extract OUTER_UDP_CKSUM bit 41 and
1934 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1935 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1937 /* Shift oltype by 2 to start nibble from BIT(56)
1938 * instead of BIT(58)
1940 xtmp128 = vshrq_n_u8(xtmp128, 2);
1941 ytmp128 = vshrq_n_u8(ytmp128, 2);
1943 * E(48):L3_LEN(8):L2_LEN(z+7)
1944 * E(48):L3_LEN(8):L2_LEN(z+7)
1946 const int8x16_t tshft3 = {
1947 -1, 0, 8, 8, 8, 8, 8, 8,
1948 -1, 0, 8, 8, 8, 8, 8, 8,
1951 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1952 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1955 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1956 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1958 /* Pick only relevant fields i.e Bit 56:63 of oltype
1959 * and place it in ol3/ol4type of senddesc_w1
1961 const uint8x16_t shuf_mask0 = {
1962 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1963 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1966 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1967 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1969 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1970 * a [E(32):E(16):OL3(8):OL2(8)]
1972 * a [E(32):E(16):(OL3+OL2):OL2]
1973 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1975 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1976 vshlq_n_u16(senddesc01_w1, 8));
1977 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1978 vshlq_n_u16(senddesc23_w1, 8));
1980 /* Move ltypes to senddesc*_w1 */
1981 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1982 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1983 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1984 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1985 /* Lookup table to translate ol_flags to
1986 * ol4type, ol3type, il4type, il3type of senddesc_w1
1988 const uint8x16x2_t tbl = {{
1990 /* [0-15] = il4type:il3type */
1991 0x04, /* none (IPv6) */
1992 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1993 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1994 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1995 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1996 0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1997 * RTE_MBUF_F_TX_TCP_CKSUM
1999 0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2000 * RTE_MBUF_F_TX_SCTP_CKSUM
2002 0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2003 * RTE_MBUF_F_TX_UDP_CKSUM
2005 0x02, /* RTE_MBUF_F_TX_IPV4 */
2006 0x12, /* RTE_MBUF_F_TX_IPV4 |
2007 * RTE_MBUF_F_TX_TCP_CKSUM
2009 0x22, /* RTE_MBUF_F_TX_IPV4 |
2010 * RTE_MBUF_F_TX_SCTP_CKSUM
2012 0x32, /* RTE_MBUF_F_TX_IPV4 |
2013 * RTE_MBUF_F_TX_UDP_CKSUM
2015 0x03, /* RTE_MBUF_F_TX_IPV4 |
2016 * RTE_MBUF_F_TX_IP_CKSUM
2018 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2019 * RTE_MBUF_F_TX_TCP_CKSUM
2021 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2022 * RTE_MBUF_F_TX_SCTP_CKSUM
2024 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2025 * RTE_MBUF_F_TX_UDP_CKSUM
2030 /* [16-31] = ol4type:ol3type */
2032 0x03, /* OUTER_IP_CKSUM */
2033 0x02, /* OUTER_IPV4 */
2034 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2035 0x04, /* OUTER_IPV6 */
2036 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2037 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2038 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2041 0x00, /* OUTER_UDP_CKSUM */
2042 0x33, /* OUTER_UDP_CKSUM |
2045 0x32, /* OUTER_UDP_CKSUM |
2048 0x33, /* OUTER_UDP_CKSUM |
2049 * OUTER_IPV4 | OUTER_IP_CKSUM
2051 0x34, /* OUTER_UDP_CKSUM |
2054 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2057 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2060 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2061 * OUTER_IPV4 | OUTER_IP_CKSUM
2066 /* Extract olflags to translate to oltype & iltype */
2067 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2068 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2071 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2072 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2074 const uint32x4_t tshft_4 = {
2080 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2081 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2084 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2085 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2087 const uint8x16_t shuf_mask5 = {
2088 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2089 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2091 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2092 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2094 /* Extract outer and inner header ol_flags */
2095 const uint64x2_t oi_cksum_mask = {
2100 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2101 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2103 /* Extract OUTER_UDP_CKSUM bit 41 and
2107 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2108 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2110 /* Shift right oltype by 2 and iltype by 4
2111 * to start oltype nibble from BIT(58)
2112 * instead of BIT(56) and iltype nibble from BIT(48)
2113 * instead of BIT(52).
2115 const int8x16_t tshft5 = {
2116 8, 8, 8, 8, 8, 8, -4, -2,
2117 8, 8, 8, 8, 8, 8, -4, -2,
2120 xtmp128 = vshlq_u8(xtmp128, tshft5);
2121 ytmp128 = vshlq_u8(ytmp128, tshft5);
2123 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2124 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2126 const int8x16_t tshft3 = {
2127 -1, 0, -1, 0, 0, 0, 0, 0,
2128 -1, 0, -1, 0, 0, 0, 0, 0,
2131 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2132 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2134 /* Mark Bit(4) of oltype */
2135 const uint64x2_t oi_cksum_mask2 = {
2140 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2141 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2144 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2145 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2147 /* Pick only relevant fields i.e Bit 48:55 of iltype and
2148 * Bit 56:63 of oltype and place it in corresponding
2149 * place in senddesc_w1.
2151 const uint8x16_t shuf_mask0 = {
2152 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2153 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2156 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2157 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2159 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2160 * l3len, l2len, ol3len, ol2len.
2161 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2163 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2165 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2166 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2168 senddesc01_w1 = vaddq_u8(senddesc01_w1,
2169 vshlq_n_u32(senddesc01_w1, 8));
2170 senddesc23_w1 = vaddq_u8(senddesc23_w1,
2171 vshlq_n_u32(senddesc23_w1, 8));
2173 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2174 senddesc01_w1 = vaddq_u8(
2175 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2176 senddesc23_w1 = vaddq_u8(
2177 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2179 /* Move ltypes to senddesc*_w1 */
2180 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2181 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2184 xmask01 = vdupq_n_u64(0);
2186 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2191 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2196 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2201 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2205 xmask01 = vshlq_n_u64(xmask01, 20);
2206 xmask23 = vshlq_n_u64(xmask23, 20);
2208 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2209 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2211 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2212 /* Tx ol_flag for vlan. */
2213 const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2214 /* Bit enable for VLAN1 */
2215 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2216 /* Tx ol_flag for QnQ. */
2217 const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2218 /* Bit enable for VLAN0 */
2219 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2220 /* Load vlan values from packet. outer is VLAN 0 */
2221 uint64x2_t ext01 = {
2222 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2223 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2224 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2225 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2227 uint64x2_t ext23 = {
2228 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2229 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2230 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2231 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2234 /* Get ol_flags of the packets. */
2235 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2236 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2238 /* ORR vlan outer/inner values into cmd. */
2239 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2240 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2242 /* Test for offload enable bits and generate masks. */
2243 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2245 vandq_u64(vtstq_u64(xtmp128, olq),
2247 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2249 vandq_u64(vtstq_u64(ytmp128, olq),
2252 /* Set vlan enable bits into cmd based on mask. */
2253 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2254 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2257 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2258 /* Tx ol_flag for timestamp. */
2259 const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2260 RTE_MBUF_F_TX_IEEE1588_TMST};
2261 /* Set send mem alg to SUB. */
2262 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2263 /* Increment send mem address by 8. */
2264 const uint64x2_t addr = {0x8, 0x8};
2266 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2267 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2269 /* Check if timestamp is requested and generate inverted
2270 * mask as we need not make any changes to default cmd
2273 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2274 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2276 /* Change send mem address to an 8 byte offset when
2277 * TSTMP is disabled.
2279 sendmem01_w1 = vaddq_u64(sendmem01_w1,
2280 vandq_u64(xtmp128, addr));
2281 sendmem23_w1 = vaddq_u64(sendmem23_w1,
2282 vandq_u64(ytmp128, addr));
2283 /* Change send mem alg to SUB when TSTMP is disabled. */
2284 sendmem01_w0 = vorrq_u64(sendmem01_w0,
2285 vandq_u64(xtmp128, alg));
2286 sendmem23_w0 = vorrq_u64(sendmem23_w0,
2287 vandq_u64(ytmp128, alg));
2289 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2290 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2291 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2292 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2295 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2296 const uint64_t lso_fmt = txq->lso_tun_fmt;
2297 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2298 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2300 /* Extract SD W1 as we need to set L4 types. */
2301 vst1q_u64(sd_w1, senddesc01_w1);
2302 vst1q_u64(sd_w1 + 2, senddesc23_w1);
2304 /* Extract SX W0 as we need to set LSO fields. */
2305 vst1q_u64(sx_w0, sendext01_w0);
2306 vst1q_u64(sx_w0 + 2, sendext23_w0);
2308 /* Extract ol_flags. */
2309 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2310 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2312 /* Prepare individual mbufs. */
2313 cn10k_nix_prepare_tso(tx_pkts[0],
2314 (union nix_send_hdr_w1_u *)&sd_w1[0],
2315 (union nix_send_ext_w0_u *)&sx_w0[0],
2316 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2318 cn10k_nix_prepare_tso(tx_pkts[1],
2319 (union nix_send_hdr_w1_u *)&sd_w1[1],
2320 (union nix_send_ext_w0_u *)&sx_w0[1],
2321 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2323 cn10k_nix_prepare_tso(tx_pkts[2],
2324 (union nix_send_hdr_w1_u *)&sd_w1[2],
2325 (union nix_send_ext_w0_u *)&sx_w0[2],
2326 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2328 cn10k_nix_prepare_tso(tx_pkts[3],
2329 (union nix_send_hdr_w1_u *)&sd_w1[3],
2330 (union nix_send_ext_w0_u *)&sx_w0[3],
2331 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2333 senddesc01_w1 = vld1q_u64(sd_w1);
2334 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2336 sendext01_w0 = vld1q_u64(sx_w0);
2337 sendext23_w0 = vld1q_u64(sx_w0 + 2);
2340 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2341 !(flags & NIX_TX_MULTI_SEG_F) &&
2342 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2343 /* Set don't free bit if reference count > 1 */
2344 xmask01 = vdupq_n_u64(0);
2347 /* Move mbufs to iova */
2348 mbuf0 = (uint64_t *)tx_pkts[0];
2349 mbuf1 = (uint64_t *)tx_pkts[1];
2350 mbuf2 = (uint64_t *)tx_pkts[2];
2351 mbuf3 = (uint64_t *)tx_pkts[3];
2353 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2354 vsetq_lane_u64(0x80000, xmask01, 0);
2356 RTE_MEMPOOL_CHECK_COOKIES(
2357 ((struct rte_mbuf *)mbuf0)->pool,
2358 (void **)&mbuf0, 1, 0);
2360 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2361 vsetq_lane_u64(0x80000, xmask01, 1);
2363 RTE_MEMPOOL_CHECK_COOKIES(
2364 ((struct rte_mbuf *)mbuf1)->pool,
2365 (void **)&mbuf1, 1, 0);
2367 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2368 vsetq_lane_u64(0x80000, xmask23, 0);
2370 RTE_MEMPOOL_CHECK_COOKIES(
2371 ((struct rte_mbuf *)mbuf2)->pool,
2372 (void **)&mbuf2, 1, 0);
2374 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2375 vsetq_lane_u64(0x80000, xmask23, 1);
2377 RTE_MEMPOOL_CHECK_COOKIES(
2378 ((struct rte_mbuf *)mbuf3)->pool,
2379 (void **)&mbuf3, 1, 0);
2380 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2381 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2382 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2383 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2384 /* Move mbufs to iova */
2385 mbuf0 = (uint64_t *)tx_pkts[0];
2386 mbuf1 = (uint64_t *)tx_pkts[1];
2387 mbuf2 = (uint64_t *)tx_pkts[2];
2388 mbuf3 = (uint64_t *)tx_pkts[3];
2390 /* Mark mempool object as "put" since
2391 * it is freed by NIX
2393 RTE_MEMPOOL_CHECK_COOKIES(
2394 ((struct rte_mbuf *)mbuf0)->pool,
2395 (void **)&mbuf0, 1, 0);
2397 RTE_MEMPOOL_CHECK_COOKIES(
2398 ((struct rte_mbuf *)mbuf1)->pool,
2399 (void **)&mbuf1, 1, 0);
2401 RTE_MEMPOOL_CHECK_COOKIES(
2402 ((struct rte_mbuf *)mbuf2)->pool,
2403 (void **)&mbuf2, 1, 0);
2405 RTE_MEMPOOL_CHECK_COOKIES(
2406 ((struct rte_mbuf *)mbuf3)->pool,
2407 (void **)&mbuf3, 1, 0);
2410 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2411 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2412 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2413 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2414 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2416 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2417 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2418 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2419 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2421 if (flags & NIX_TX_NEED_EXT_HDR) {
2422 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2423 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2424 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2425 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2428 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2429 const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2430 RTE_MBUF_F_TX_SEC_OFFLOAD};
2434 /* Extract ol_flags. */
2435 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2436 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2438 xtmp128 = vtstq_u64(olf, xtmp128);
2439 ytmp128 = vtstq_u64(olf, ytmp128);
2442 dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2443 if (vgetq_lane_u64(xtmp128, 0))
2444 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2445 &cmd1[0], &next, c_laddr,
2447 &c_shft, sa_base, flags);
2449 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2450 &shift, &wd.data128, &next);
2452 /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2453 cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2454 cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2458 dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2459 if (vgetq_lane_u64(xtmp128, 1))
2460 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2461 &cmd1[1], &next, c_laddr,
2463 &c_shft, sa_base, flags);
2465 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2466 &shift, &wd.data128, &next);
2468 /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2469 cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2470 cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2474 dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2475 if (vgetq_lane_u64(ytmp128, 0))
2476 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2477 &cmd1[2], &next, c_laddr,
2479 &c_shft, sa_base, flags);
2481 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2482 &shift, &wd.data128, &next);
2484 /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2485 cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2486 cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2490 dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2491 if (vgetq_lane_u64(ytmp128, 1))
2492 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2493 &cmd1[3], &next, c_laddr,
2495 &c_shft, sa_base, flags);
2497 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2498 &shift, &wd.data128, &next);
2500 /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2501 cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2502 cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2505 } else if (flags & NIX_TX_MULTI_SEG_F) {
2509 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2512 LMT_OFF(laddr, lnum,
2514 &wd.data128, &shift,
2517 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2518 /* Store the prepared send desc to LMT lines */
2519 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2520 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2521 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2522 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2523 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2524 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2525 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2526 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2527 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2529 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2530 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2531 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2532 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2533 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2534 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2535 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2536 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2538 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2539 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2540 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2541 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2542 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2543 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2545 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2546 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2547 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2548 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2549 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2550 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2554 /* Store the prepared send desc to LMT lines */
2555 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2556 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2557 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2558 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2559 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2560 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2561 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2562 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2566 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2569 /* Roundup lnum to last line if it is partial */
2570 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2571 lnum = lnum + !!loff;
2572 wd.data128 = wd.data128 |
2573 (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2576 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2579 if (flags & NIX_TX_VWQE_F)
2580 roc_sso_hws_head_wait(base);
2584 /* Submit CPT instructions if any */
2585 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2586 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2591 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2592 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2594 pa = io_addr | (wd.data[0] & 0x7) << 4;
2595 wd.data[0] &= ~0x7ULL;
2597 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2600 wd.data[0] |= (15ULL << 12);
2601 wd.data[0] |= (uint64_t)lmt_id;
2604 roc_lmt_submit_steorl(wd.data[0], pa);
2606 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2607 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2609 pa = io_addr | (wd.data[1] & 0x7) << 4;
2610 wd.data[1] &= ~0x7ULL;
2612 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2615 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2616 wd.data[1] |= (uint64_t)(lmt_id + 16);
2619 roc_lmt_submit_steorl(wd.data[1], pa);
2621 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2622 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2624 pa = io_addr | (wd.data[0] & 0x7) << 4;
2625 wd.data[0] &= ~0x7ULL;
2627 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2630 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2631 wd.data[0] |= lmt_id;
2634 roc_lmt_submit_steorl(wd.data[0], pa);
2641 if (unlikely(scalar)) {
2642 if (flags & NIX_TX_MULTI_SEG_F)
2643 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2647 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2655 static __rte_always_inline uint16_t
2656 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2657 uint16_t pkts, uint64_t *cmd, uintptr_t base,
2658 const uint16_t flags)
2660 RTE_SET_USED(tx_queue);
2661 RTE_SET_USED(tx_pkts);
2664 RTE_SET_USED(flags);
2670 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2671 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2672 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2673 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2674 #define TSO_F NIX_TX_OFFLOAD_TSO_F
2675 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2676 #define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F
2678 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2679 #define NIX_TX_FASTPATH_MODES_0_15 \
2680 T(no_offload, 4, NIX_TX_OFFLOAD_NONE) \
2681 T(l3l4csum, 4, L3L4CSUM_F) \
2682 T(ol3ol4csum, 4, OL3OL4CSUM_F) \
2683 T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F) \
2684 T(vlan, 6, VLAN_F) \
2685 T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F) \
2686 T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F) \
2687 T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2688 T(noff, 4, NOFF_F) \
2689 T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F) \
2690 T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F) \
2691 T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2692 T(noff_vlan, 6, NOFF_F | VLAN_F) \
2693 T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F) \
2694 T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2695 T(noff_vlan_ol3ol4csum_l3l4csum, 6, \
2696 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2698 #define NIX_TX_FASTPATH_MODES_16_31 \
2700 T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F) \
2701 T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F) \
2702 T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2703 T(tso_vlan, 6, TSO_F | VLAN_F) \
2704 T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F) \
2705 T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F) \
2706 T(tso_vlan_ol3ol4csum_l3l4csum, 6, \
2707 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2708 T(tso_noff, 6, TSO_F | NOFF_F) \
2709 T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F) \
2710 T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F) \
2711 T(tso_noff_ol3ol4csum_l3l4csum, 6, \
2712 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2713 T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F) \
2714 T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2715 T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2716 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2717 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2719 #define NIX_TX_FASTPATH_MODES_32_47 \
2721 T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F) \
2722 T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F) \
2723 T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2724 T(ts_vlan, 8, TSP_F | VLAN_F) \
2725 T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F) \
2726 T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F) \
2727 T(ts_vlan_ol3ol4csum_l3l4csum, 8, \
2728 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2729 T(ts_noff, 8, TSP_F | NOFF_F) \
2730 T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F) \
2731 T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F) \
2732 T(ts_noff_ol3ol4csum_l3l4csum, 8, \
2733 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2734 T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F) \
2735 T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2736 T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2737 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2738 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2740 #define NIX_TX_FASTPATH_MODES_48_63 \
2741 T(ts_tso, 8, TSP_F | TSO_F) \
2742 T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F) \
2743 T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F) \
2744 T(ts_tso_ol3ol4csum_l3l4csum, 8, \
2745 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2746 T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F) \
2747 T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2748 T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2749 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2750 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2751 T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F) \
2752 T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2753 T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2754 T(ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2755 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2756 T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F) \
2757 T(ts_tso_noff_vlan_l3l4csum, 8, \
2758 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2759 T(ts_tso_noff_vlan_ol3ol4csum, 8, \
2760 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2761 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2762 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2764 #define NIX_TX_FASTPATH_MODES_64_79 \
2765 T(sec, 4, T_SEC_F) \
2766 T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F) \
2767 T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F) \
2768 T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2769 T(sec_vlan, 6, T_SEC_F | VLAN_F) \
2770 T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F) \
2771 T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F) \
2772 T(sec_vlan_ol3ol4csum_l3l4csum, 6, \
2773 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2774 T(sec_noff, 4, T_SEC_F | NOFF_F) \
2775 T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F) \
2776 T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F) \
2777 T(sec_noff_ol3ol4csum_l3l4csum, 4, \
2778 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2779 T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F) \
2780 T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2781 T(sec_noff_vlan_ol3ol4csum, 6, \
2782 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2783 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2784 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2786 #define NIX_TX_FASTPATH_MODES_80_95 \
2787 T(sec_tso, 6, T_SEC_F | TSO_F) \
2788 T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F) \
2789 T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F) \
2790 T(sec_tso_ol3ol4csum_l3l4csum, 6, \
2791 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2792 T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F) \
2793 T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2794 T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2795 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6, \
2796 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2797 T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F) \
2798 T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2799 T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2800 T(sec_tso_noff_ol3ol4csum_l3l4csum, 6, \
2801 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2802 T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F) \
2803 T(sec_tso_noff_vlan_l3l4csum, 6, \
2804 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2805 T(sec_tso_noff_vlan_ol3ol4csum, 6, \
2806 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2807 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2808 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2810 #define NIX_TX_FASTPATH_MODES_96_111 \
2811 T(sec_ts, 8, T_SEC_F | TSP_F) \
2812 T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F) \
2813 T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F) \
2814 T(sec_ts_ol3ol4csum_l3l4csum, 8, \
2815 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2816 T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F) \
2817 T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \
2818 T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \
2819 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8, \
2820 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2821 T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F) \
2822 T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \
2823 T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \
2824 T(sec_ts_noff_ol3ol4csum_l3l4csum, 8, \
2825 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2826 T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F) \
2827 T(sec_ts_noff_vlan_l3l4csum, 8, \
2828 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2829 T(sec_ts_noff_vlan_ol3ol4csum, 8, \
2830 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2831 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2832 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2834 #define NIX_TX_FASTPATH_MODES_112_127 \
2835 T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F) \
2836 T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \
2837 T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \
2838 T(sec_ts_tso_ol3ol4csum_l3l4csum, 8, \
2839 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2840 T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F) \
2841 T(sec_ts_tso_vlan_l3l4csum, 8, \
2842 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2843 T(sec_ts_tso_vlan_ol3ol4csum, 8, \
2844 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2845 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2846 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2847 T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F) \
2848 T(sec_ts_tso_noff_l3l4csum, 8, \
2849 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2850 T(sec_ts_tso_noff_ol3ol4csum, 8, \
2851 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2852 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2853 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2854 T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \
2855 T(sec_ts_tso_noff_vlan_l3l4csum, 8, \
2856 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2857 T(sec_ts_tso_noff_vlan_ol3ol4csum, 8, \
2858 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2859 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2860 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2863 #define NIX_TX_FASTPATH_MODES \
2864 NIX_TX_FASTPATH_MODES_0_15 \
2865 NIX_TX_FASTPATH_MODES_16_31 \
2866 NIX_TX_FASTPATH_MODES_32_47 \
2867 NIX_TX_FASTPATH_MODES_48_63 \
2868 NIX_TX_FASTPATH_MODES_64_79 \
2869 NIX_TX_FASTPATH_MODES_80_95 \
2870 NIX_TX_FASTPATH_MODES_96_111 \
2871 NIX_TX_FASTPATH_MODES_112_127
2873 #define T(name, sz, flags) \
2874 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
2875 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2876 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
2877 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2878 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
2879 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2880 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2881 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2883 NIX_TX_FASTPATH_MODES
2886 #define NIX_TX_XMIT(fn, sz, flags) \
2887 uint16_t __rte_noinline __rte_hot fn( \
2888 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2891 /* For TSO inner checksum is a must */ \
2892 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2893 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2895 return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, 0, \
2899 #define NIX_TX_XMIT_MSEG(fn, sz, flags) \
2900 uint16_t __rte_noinline __rte_hot fn( \
2901 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2903 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
2904 /* For TSO inner checksum is a must */ \
2905 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2906 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2908 return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
2910 flags | NIX_TX_MULTI_SEG_F); \
2913 #define NIX_TX_XMIT_VEC(fn, sz, flags) \
2914 uint16_t __rte_noinline __rte_hot fn( \
2915 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2918 /* For TSO inner checksum is a must */ \
2919 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2920 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2922 return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, \
2926 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags) \
2927 uint16_t __rte_noinline __rte_hot fn( \
2928 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2930 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
2931 /* For TSO inner checksum is a must */ \
2932 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2933 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2935 return cn10k_nix_xmit_pkts_vector( \
2936 tx_queue, tx_pkts, pkts, cmd, 0, \
2937 (flags) | NIX_TX_MULTI_SEG_F); \
2940 #endif /* __CN10K_TX_H__ */