1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #include <rte_eventdev.h>
11 #define NIX_TX_OFFLOAD_NONE (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
19 #define NIX_TX_OFFLOAD_MAX (NIX_TX_OFFLOAD_SECURITY_F << 1)
21 /* Flags to control xmit_prepare function.
22 * Defining it from backwards to denote its been
23 * not used as offload flags to pick function
25 #define NIX_TX_VWQE_F BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
28 #define NIX_TX_NEED_SEND_HDR_W1 \
29 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
30 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
32 #define NIX_TX_NEED_EXT_HDR \
33 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
38 /* Cached value is low, Update the fc_cache_pkts */ \
39 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
40 /* Multiply with sqe_per_sqb to express in pkts */ \
41 (txq)->fc_cache_pkts = \
42 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
43 << (txq)->sqes_per_sqb_log2; \
44 /* Check it again for the room */ \
45 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51 * is encoded as 4bits.
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
57 /* Function to determine no of tx subdesc required in case ext
58 * sub desc is enabled.
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
63 return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
66 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
74 if (!(flags & NIX_TX_MULTI_SEG_F))
75 return cn10k_nix_tx_ext_subs(flags) + 2;
77 /* Already everything is accounted for in segdw */
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
84 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85 << ROC_LMT_LINES_PER_CORE_LOG2;
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
91 return (flags & NIX_TX_NEED_EXT_HDR) ?
92 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
99 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
102 /* This will be moved to addr area */
104 /* 15 vector sizes for single seg */
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
127 return ((flags & NIX_TX_NEED_EXT_HDR) ?
128 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
135 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
138 /* This will be moved to addr area */
140 /* 15 vector sizes for single seg */
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
163 /* We have two CPT instructions per LMTLine */
164 const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
167 /* This will be moved to addr area */
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
190 const uint16_t flags, const uint16_t static_sz)
193 cmd[0] = txq->send_hdr_w0;
195 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
196 ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
199 if (flags & NIX_TX_NEED_EXT_HDR) {
200 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
201 cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
203 cmd[2] = NIX_SUBDC_EXT << 60;
205 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
207 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
211 static __rte_always_inline void
212 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
213 uint8_t loff, uint8_t shft)
218 /* Check if there is any CPT instruction to submit */
222 data = cn10k_cpt_tx_steor_data();
223 /* Update lmtline use for partial end line */
225 data &= ~(0x7ULL << shft);
226 /* Update it to half full i.e 64B */
227 data |= (0x3UL << shft);
230 pa = io_addr | ((data >> 16) & 0x7) << 4;
231 data &= ~(0x7ULL << 16);
232 /* Update lines - 1 that contain valid data */
233 data |= ((uint64_t)(lnum + loff - 1)) << 12;
237 roc_lmt_submit_steorl(data, pa);
240 #if defined(RTE_ARCH_ARM64)
241 static __rte_always_inline void
242 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
243 uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
244 uint8_t *loff, uint8_t *shft, uint64_t sa_base,
245 const uint16_t flags)
247 struct cn10k_sec_sess_priv sess_priv;
248 uint32_t pkt_len, dlen_adj, rlen;
249 uint64x2_t cmd01, cmd23;
250 uintptr_t dptr, nixtx;
251 uint64_t ucode_cmd[4];
257 sess_priv.u64 = *rte_security_dynfield(m);
259 if (flags & NIX_TX_NEED_SEND_HDR_W1)
260 l2_len = vgetq_lane_u8(*cmd0, 8);
265 dptr = vgetq_lane_u64(*cmd1, 1);
266 pkt_len = vgetq_lane_u16(*cmd0, 0);
268 /* Calculate dlen adj */
269 dlen_adj = pkt_len - l2_len;
270 rlen = (dlen_adj + sess_priv.roundup_len) +
271 (sess_priv.roundup_byte - 1);
272 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
273 rlen += sess_priv.partial_len;
274 dlen_adj = rlen - dlen_adj;
276 /* Update send descriptors. Security is single segment only */
277 *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
278 *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
280 /* Get area where NIX descriptor needs to be stored */
281 nixtx = dptr + pkt_len + dlen_adj;
283 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
285 /* Return nixtx addr */
286 *nixtx_addr = (nixtx + 16);
288 /* DLEN passed is excluding L2HDR */
290 tag = sa_base & 0xFFFFUL;
291 sa_base &= ~0xFFFFUL;
292 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
293 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
295 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
297 /* CPT Word 0 and Word 1 */
298 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
299 /* CPT_RES_S is 16B above NIXTX */
300 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
302 /* CPT word 2 and 3 */
303 cmd23 = vdupq_n_u64(0);
304 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
305 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
306 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
310 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
311 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
312 *((uint16_t *)(dptr - 2)) =
313 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
315 *((uint16_t *)(dptr - 2)) =
316 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
322 /* Move to our line */
323 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
325 /* Write CPT instruction to lmt line */
326 vst1q_u64(laddr, cmd01);
327 vst1q_u64((laddr + 2), cmd23);
329 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
330 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
332 /* Move to next line for every other CPT inst */
334 *lnum = *lnum + (*loff ? 0 : 1);
335 *shft = *shft + (*loff ? 0 : 3);
338 static __rte_always_inline void
339 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
340 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
341 uint64_t sa_base, const uint16_t flags)
343 struct cn10k_sec_sess_priv sess_priv;
344 uint32_t pkt_len, dlen_adj, rlen;
345 struct nix_send_hdr_s *send_hdr;
346 uint64x2_t cmd01, cmd23;
347 union nix_send_sg_s *sg;
348 uintptr_t dptr, nixtx;
349 uint64_t ucode_cmd[4];
355 /* Move to our line from base */
356 sess_priv.u64 = *rte_security_dynfield(m);
357 send_hdr = (struct nix_send_hdr_s *)cmd;
358 if (flags & NIX_TX_NEED_EXT_HDR)
359 sg = (union nix_send_sg_s *)&cmd[4];
361 sg = (union nix_send_sg_s *)&cmd[2];
363 if (flags & NIX_TX_NEED_SEND_HDR_W1)
364 l2_len = cmd[1] & 0xFF;
369 dptr = *(uint64_t *)(sg + 1);
370 pkt_len = send_hdr->w0.total;
372 /* Calculate dlen adj */
373 dlen_adj = pkt_len - l2_len;
374 rlen = (dlen_adj + sess_priv.roundup_len) +
375 (sess_priv.roundup_byte - 1);
376 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
377 rlen += sess_priv.partial_len;
378 dlen_adj = rlen - dlen_adj;
380 /* Update send descriptors. Security is single segment only */
381 send_hdr->w0.total = pkt_len + dlen_adj;
382 sg->seg1_size = pkt_len + dlen_adj;
384 /* Get area where NIX descriptor needs to be stored */
385 nixtx = dptr + pkt_len + dlen_adj;
387 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
389 /* Return nixtx addr */
390 *nixtx_addr = (nixtx + 16);
392 /* DLEN passed is excluding L2HDR */
394 tag = sa_base & 0xFFFFUL;
395 sa_base &= ~0xFFFFUL;
396 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
397 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
399 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
401 /* CPT Word 0 and Word 1. Assume no multi-seg support */
402 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
403 /* CPT_RES_S is 16B above NIXTX */
404 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
406 /* CPT word 2 and 3 */
407 cmd23 = vdupq_n_u64(0);
408 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
409 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
410 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
414 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
415 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
416 *((uint16_t *)(dptr - 2)) =
417 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
419 *((uint16_t *)(dptr - 2)) =
420 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
425 /* Move to our line */
426 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
428 /* Write CPT instruction to lmt line */
429 vst1q_u64(laddr, cmd01);
430 vst1q_u64((laddr + 2), cmd23);
432 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
433 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
435 /* Move to next line for every other CPT inst */
437 *lnum = *lnum + (*loff ? 0 : 1);
438 *shft = *shft + (*loff ? 0 : 3);
443 static __rte_always_inline void
444 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
445 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
446 uint64_t sa_base, const uint16_t flags)
450 RTE_SET_USED(nixtx_addr);
455 RTE_SET_USED(sa_base);
460 static __rte_always_inline void
461 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
463 uint64_t mask, ol_flags = m->ol_flags;
465 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
466 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
467 uint16_t *iplen, *oiplen, *oudplen;
468 uint16_t lso_sb, paylen;
470 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
471 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
472 m->l2_len + m->l3_len + m->l4_len;
474 /* Reduce payload len from base headers */
475 paylen = m->pkt_len - lso_sb;
477 /* Get iplen position assuming no tunnel hdr */
478 iplen = (uint16_t *)(mdata + m->l2_len +
479 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
480 /* Handle tunnel tso */
481 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
482 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
483 const uint8_t is_udp_tun =
484 (CNXK_NIX_UDP_TUN_BITMASK >>
485 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
488 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
490 RTE_MBUF_F_TX_OUTER_IPV6)));
491 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
494 /* Update format for UDP tunneled packet */
496 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
497 m->outer_l3_len + 4);
498 *oudplen = rte_cpu_to_be_16(
499 rte_be_to_cpu_16(*oudplen) - paylen);
502 /* Update iplen position to inner ip hdr */
503 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
505 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
508 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
512 static __rte_always_inline void
513 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
514 const uint64_t lso_tun_fmt, bool *sec)
516 struct nix_send_ext_s *send_hdr_ext;
517 struct nix_send_hdr_s *send_hdr;
518 uint64_t ol_flags = 0, mask;
519 union nix_send_hdr_w1_u w1;
520 union nix_send_sg_s *sg;
522 send_hdr = (struct nix_send_hdr_s *)cmd;
523 if (flags & NIX_TX_NEED_EXT_HDR) {
524 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
525 sg = (union nix_send_sg_s *)(cmd + 4);
526 /* Clear previous markings */
527 send_hdr_ext->w0.lso = 0;
528 send_hdr_ext->w1.u = 0;
530 sg = (union nix_send_sg_s *)(cmd + 2);
533 if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
534 ol_flags = m->ol_flags;
538 if (!(flags & NIX_TX_MULTI_SEG_F))
539 send_hdr->w0.total = m->data_len;
541 send_hdr->w0.total = m->pkt_len;
542 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
546 * 3 => IPV4 with csum
548 * L3type and L3ptr needs to be set for either
549 * L3 csum or L4 csum or LSO
553 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
554 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
555 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
556 const uint8_t ol3type =
557 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
558 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
559 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
562 w1.ol3type = ol3type;
563 mask = 0xffffull << ((!!ol3type) << 4);
564 w1.ol3ptr = ~mask & m->outer_l2_len;
565 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
568 w1.ol4type = csum + (csum << 1);
571 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
572 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
573 w1.il3ptr = w1.ol4ptr + m->l2_len;
574 w1.il4ptr = w1.il3ptr + m->l3_len;
575 /* Increment it by 1 if it is IPV4 as 3 is with csum */
576 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
579 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
581 /* In case of no tunnel header use only
582 * shift IL3/IL4 fields a bit to use
583 * OL3/OL4 for header checksum
586 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
587 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
589 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
590 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
591 const uint8_t outer_l2_len = m->outer_l2_len;
594 w1.ol3ptr = outer_l2_len;
595 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
596 /* Increment it by 1 if it is IPV4 as 3 is with csum */
597 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
598 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
599 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
602 w1.ol4type = csum + (csum << 1);
604 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
605 const uint8_t l2_len = m->l2_len;
607 /* Always use OLXPTR and OLXTYPE when only
608 * when one header is present
613 w1.ol4ptr = l2_len + m->l3_len;
614 /* Increment it by 1 if it is IPV4 as 3 is with csum */
615 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
616 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
617 !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
620 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
623 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
624 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
625 /* HW will update ptr after vlan0 update */
626 send_hdr_ext->w1.vlan1_ins_ptr = 12;
627 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
629 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
630 /* 2B before end of l2 header */
631 send_hdr_ext->w1.vlan0_ins_ptr = 12;
632 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
635 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
639 mask = -(!w1.il3type);
640 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
642 send_hdr_ext->w0.lso_sb = lso_sb;
643 send_hdr_ext->w0.lso = 1;
644 send_hdr_ext->w0.lso_mps = m->tso_segsz;
645 send_hdr_ext->w0.lso_format =
646 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
647 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
649 /* Handle tunnel tso */
650 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
651 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
652 const uint8_t is_udp_tun =
653 (CNXK_NIX_UDP_TUN_BITMASK >>
654 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
656 uint8_t shift = is_udp_tun ? 32 : 0;
658 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
659 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
661 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
662 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
663 /* Update format for UDP tunneled packet */
664 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
668 if (flags & NIX_TX_NEED_SEND_HDR_W1)
669 send_hdr->w1.u = w1.u;
671 if (!(flags & NIX_TX_MULTI_SEG_F)) {
672 sg->seg1_size = send_hdr->w0.total;
673 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
675 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
676 /* DF bit = 1 if refcount of current mbuf or parent mbuf
678 * DF bit = 0 otherwise
680 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
682 /* Mark mempool object as "put" since it is freed by NIX */
683 if (!send_hdr->w0.df)
684 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
686 sg->seg1_size = m->data_len;
687 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
689 /* NOFF is handled later for multi-seg */
692 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
693 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
696 static __rte_always_inline void
697 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
698 const uint16_t flags)
700 struct nix_send_ext_s *send_hdr_ext;
701 union nix_send_sg_s *sg;
703 /* With minimal offloads, 'cmd' being local could be optimized out to
704 * registers. In other cases, 'cmd' will be in stack. Intent is
705 * 'cmd' stores content from txq->cmd which is copied only once.
707 *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
709 if (flags & NIX_TX_NEED_EXT_HDR) {
710 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
711 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
714 sg = (union nix_send_sg_s *)(cmd + 4);
716 sg = (union nix_send_sg_s *)(cmd + 2);
718 /* In case of multi-seg, sg template is stored here */
719 *((union nix_send_sg_s *)lmt_addr) = *sg;
720 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
723 static __rte_always_inline void
724 cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
725 const uint64_t ol_flags, const uint16_t no_segdw,
726 const uint16_t flags)
728 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
729 const uint8_t is_ol_tstamp =
730 !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
731 uint64_t *lmt = (uint64_t *)lmt_addr;
732 uint16_t off = (no_segdw - 1) << 1;
733 struct nix_send_mem_s *send_mem;
735 send_mem = (struct nix_send_mem_s *)(lmt + off);
736 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
737 * should not be recorded, hence changing the alg type to
738 * NIX_SENDMEMALG_SUB and also changing send mem addr field to
739 * next 8 bytes as it corrupts the actual Tx tstamp registered
742 send_mem->w0.subdc = NIX_SUBDC_MEM;
744 NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
746 (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
750 static __rte_always_inline uint16_t
751 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
753 struct nix_send_hdr_s *send_hdr;
754 union nix_send_sg_s *sg;
755 struct rte_mbuf *m_next;
756 uint64_t *slist, sg_u;
761 send_hdr = (struct nix_send_hdr_s *)cmd;
763 if (flags & NIX_TX_NEED_EXT_HDR)
768 sg = (union nix_send_sg_s *)&cmd[2 + off];
770 /* Start from second segment, first segment is already there */
773 nb_segs = m->nb_segs - 1;
775 slist = &cmd[3 + off + 1];
777 /* Set invert df if buffer is not to be freed by H/W */
778 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
779 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
781 /* Mark mempool object as "put" since it is freed by NIX */
782 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
783 if (!(sg_u & (1ULL << 55)))
784 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
791 /* Fill mbuf segments */
794 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
795 *slist = rte_mbuf_data_iova(m);
796 /* Set invert df if buffer is not to be freed by H/W */
797 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
798 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
799 /* Mark mempool object as "put" since it is freed by NIX
801 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
802 if (!(sg_u & (1ULL << (i + 55))))
803 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
808 if (i > 2 && nb_segs) {
810 /* Next SG subdesc */
811 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
814 sg = (union nix_send_sg_s *)slist;
824 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
825 /* Roundup extra dwords to multiple of 2 */
826 segdw = (segdw >> 1) + (segdw & 0x1);
828 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
829 send_hdr->w0.sizem1 = segdw - 1;
834 static __rte_always_inline uint16_t
835 cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
836 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
838 struct cn10k_eth_txq *txq = tx_queue;
839 const rte_iova_t io_addr = txq->io_addr;
840 uint8_t lnum, c_lnum, c_shft, c_loff;
841 uintptr_t pa, lbase = txq->lmt_base;
842 uint16_t lmt_id, burst, left, i;
843 uintptr_t c_lbase = lbase;
844 rte_iova_t c_io_addr;
845 uint64_t lso_tun_fmt;
852 if (!(flags & NIX_TX_VWQE_F)) {
853 NIX_XMIT_FC_OR_RETURN(txq, pkts);
854 /* Reduce the cached count */
855 txq->fc_cache_pkts -= pkts;
857 /* Get cmd skeleton */
858 cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
860 if (flags & NIX_TX_OFFLOAD_TSO_F)
861 lso_tun_fmt = txq->lso_tun_fmt;
863 /* Get LMT base address and LMT ID as lcore id */
864 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
865 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
866 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
867 c_io_addr = txq->cpt_io_addr;
868 sa_base = txq->sa_base;
873 burst = left > 32 ? 32 : left;
876 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
882 for (i = 0; i < burst; i++) {
883 /* Perform header writes for TSO, barrier at
884 * lmt steorl will suffice.
886 if (flags & NIX_TX_OFFLOAD_TSO_F)
887 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
889 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
892 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
894 /* Prepare CPT instruction and get nixtx addr */
895 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
896 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
897 &c_lnum, &c_loff, &c_shft, sa_base,
900 /* Move NIX desc to LMT/NIXTX area */
901 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
902 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
904 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
908 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
909 ws[1] = roc_sso_hws_head_wait(ws[0]);
914 /* Submit CPT instructions if any */
915 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
916 /* Reduce pkts to be sent to CPT */
917 burst -= ((c_lnum << 1) + c_loff);
918 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
924 data = cn10k_nix_tx_steor_data(flags);
925 pa = io_addr | (data & 0x7) << 4;
927 data |= (15ULL << 12);
928 data |= (uint64_t)lmt_id;
931 roc_lmt_submit_steorl(data, pa);
933 data = cn10k_nix_tx_steor_data(flags);
934 pa = io_addr | (data & 0x7) << 4;
936 data |= ((uint64_t)(burst - 17)) << 12;
937 data |= (uint64_t)(lmt_id + 16);
940 roc_lmt_submit_steorl(data, pa);
942 data = cn10k_nix_tx_steor_data(flags);
943 pa = io_addr | (data & 0x7) << 4;
945 data |= ((uint64_t)(burst - 1)) << 12;
949 roc_lmt_submit_steorl(data, pa);
959 static __rte_always_inline uint16_t
960 cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
961 struct rte_mbuf **tx_pkts, uint16_t pkts,
962 uint64_t *cmd, const uint16_t flags)
964 struct cn10k_eth_txq *txq = tx_queue;
965 uintptr_t pa0, pa1, lbase = txq->lmt_base;
966 const rte_iova_t io_addr = txq->io_addr;
967 uint16_t segdw, lmt_id, burst, left, i;
968 uint8_t lnum, c_lnum, c_loff;
969 uintptr_t c_lbase = lbase;
970 uint64_t data0, data1;
971 rte_iova_t c_io_addr;
972 uint64_t lso_tun_fmt;
973 uint8_t shft, c_shft;
980 if (!(flags & NIX_TX_VWQE_F)) {
981 NIX_XMIT_FC_OR_RETURN(txq, pkts);
982 /* Reduce the cached count */
983 txq->fc_cache_pkts -= pkts;
985 /* Get cmd skeleton */
986 cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
988 if (flags & NIX_TX_OFFLOAD_TSO_F)
989 lso_tun_fmt = txq->lso_tun_fmt;
991 /* Get LMT base address and LMT ID as lcore id */
992 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
993 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
994 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
995 c_io_addr = txq->cpt_io_addr;
996 sa_base = txq->sa_base;
1001 burst = left > 32 ? 32 : left;
1006 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1012 for (i = 0; i < burst; i++) {
1013 /* Perform header writes for TSO, barrier at
1014 * lmt steorl will suffice.
1016 if (flags & NIX_TX_OFFLOAD_TSO_F)
1017 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1019 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1022 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1024 /* Prepare CPT instruction and get nixtx addr */
1025 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1026 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1027 &c_lnum, &c_loff, &c_shft, sa_base,
1030 /* Move NIX desc to LMT/NIXTX area */
1031 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1032 /* Store sg list directly on lmt line */
1033 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1035 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1037 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1039 data128 |= (((__uint128_t)(segdw - 1)) << shft);
1044 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1045 ws[1] = roc_sso_hws_head_wait(ws[0]);
1050 /* Submit CPT instructions if any */
1051 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1052 /* Reduce pkts to be sent to CPT */
1053 burst -= ((c_lnum << 1) + c_loff);
1054 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1058 data0 = (uint64_t)data128;
1059 data1 = (uint64_t)(data128 >> 64);
1060 /* Make data0 similar to data1 */
1064 pa0 = io_addr | (data0 & 0x7) << 4;
1066 /* Move lmtst1..15 sz to bits 63:19 */
1068 data0 |= (15ULL << 12);
1069 data0 |= (uint64_t)lmt_id;
1072 roc_lmt_submit_steorl(data0, pa0);
1074 pa1 = io_addr | (data1 & 0x7) << 4;
1077 data1 |= ((uint64_t)(burst - 17)) << 12;
1078 data1 |= (uint64_t)(lmt_id + 16);
1081 roc_lmt_submit_steorl(data1, pa1);
1083 pa0 = io_addr | (data0 & 0x7) << 4;
1085 /* Move lmtst1..15 sz to bits 63:19 */
1087 data0 |= ((burst - 1) << 12);
1088 data0 |= (uint64_t)lmt_id;
1091 roc_lmt_submit_steorl(data0, pa0);
1101 #if defined(RTE_ARCH_ARM64)
1103 static __rte_always_inline void
1104 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1105 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1106 const uint64_t flags, const uint64_t lso_tun_fmt)
1111 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1114 mask = -(!w1->il3type);
1115 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1118 w0->lso_sb = lso_sb;
1119 w0->lso_mps = m->tso_segsz;
1120 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1121 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1123 /* Handle tunnel tso */
1124 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1125 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1126 const uint8_t is_udp_tun =
1127 (CNXK_NIX_UDP_TUN_BITMASK >>
1128 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1130 uint8_t shift = is_udp_tun ? 32 : 0;
1132 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1133 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1135 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1136 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1137 /* Update format for UDP tunneled packet */
1139 w0->lso_format = (lso_tun_fmt >> shift);
1143 static __rte_always_inline void
1144 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1145 union nix_send_hdr_w0_u *sh,
1146 union nix_send_sg_s *sg, const uint32_t flags)
1148 struct rte_mbuf *m_next;
1149 uint64_t *slist, sg_u;
1153 sh->total = m->pkt_len;
1154 /* Clear sg->u header before use */
1155 sg->u &= 0xFC00000000000000;
1159 sg_u = sg_u | ((uint64_t)m->data_len);
1161 nb_segs = m->nb_segs - 1;
1164 /* Set invert df if buffer is not to be freed by H/W */
1165 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1166 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1167 /* Mark mempool object as "put" since it is freed by NIX */
1168 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1169 if (!(sg_u & (1ULL << 55)))
1170 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1175 /* Fill mbuf segments */
1178 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1179 *slist = rte_mbuf_data_iova(m);
1180 /* Set invert df if buffer is not to be freed by H/W */
1181 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1182 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1183 /* Mark mempool object as "put" since it is freed by NIX
1185 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1186 if (!(sg_u & (1ULL << (i + 55))))
1187 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1193 if (i > 2 && nb_segs) {
1195 /* Next SG subdesc */
1196 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1199 sg = (union nix_send_sg_s *)slist;
1210 static __rte_always_inline void
1211 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1212 uint64x2_t *cmd1, const uint8_t segdw,
1213 const uint32_t flags)
1215 union nix_send_hdr_w0_u sh;
1216 union nix_send_sg_s sg;
1218 if (m->nb_segs == 1) {
1219 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1220 sg.u = vgetq_lane_u64(cmd1[0], 0);
1221 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1222 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1225 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1226 sg.u = vgetq_lane_u64(cmd1[0], 0);
1227 if (!(sg.u & (1ULL << 55)))
1228 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1234 sh.u = vgetq_lane_u64(cmd0[0], 0);
1235 sg.u = vgetq_lane_u64(cmd1[0], 0);
1237 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1239 sh.sizem1 = segdw - 1;
1240 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1241 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1244 #define NIX_DESCS_PER_LOOP 4
1246 static __rte_always_inline uint8_t
1247 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1248 uint64x2_t *cmd1, uint64x2_t *cmd2,
1249 uint64x2_t *cmd3, uint8_t *segdw,
1250 uint64_t *lmt_addr, __uint128_t *data128,
1251 uint8_t *shift, const uint16_t flags)
1253 uint8_t j, off, lmt_used;
1255 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1256 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1257 /* No segments in 4 consecutive packets. */
1258 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1259 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1260 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1263 vst1q_u64(lmt_addr, cmd0[0]);
1264 vst1q_u64(lmt_addr + 2, cmd1[0]);
1265 vst1q_u64(lmt_addr + 4, cmd0[1]);
1266 vst1q_u64(lmt_addr + 6, cmd1[1]);
1267 vst1q_u64(lmt_addr + 8, cmd0[2]);
1268 vst1q_u64(lmt_addr + 10, cmd1[2]);
1269 vst1q_u64(lmt_addr + 12, cmd0[3]);
1270 vst1q_u64(lmt_addr + 14, cmd1[3]);
1272 *data128 |= ((__uint128_t)7) << *shift;
1280 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1281 /* Fit consecutive packets in same LMTLINE. */
1282 if ((segdw[j] + segdw[j + 1]) <= 8) {
1283 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1284 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1287 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1290 segdw[j + 1], flags);
1291 /* TSTAMP takes 4 each, no segs. */
1292 vst1q_u64(lmt_addr, cmd0[j]);
1293 vst1q_u64(lmt_addr + 2, cmd2[j]);
1294 vst1q_u64(lmt_addr + 4, cmd1[j]);
1295 vst1q_u64(lmt_addr + 6, cmd3[j]);
1297 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1298 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1299 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1300 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1301 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1302 /* EXT header take 3 each, space for 2 segs.*/
1303 cn10k_nix_prepare_mseg_vec(mbufs[j],
1307 vst1q_u64(lmt_addr, cmd0[j]);
1308 vst1q_u64(lmt_addr + 2, cmd2[j]);
1309 vst1q_u64(lmt_addr + 4, cmd1[j]);
1312 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1313 lmt_addr + 12 + off,
1316 segdw[j + 1], flags);
1317 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1318 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1319 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1321 cn10k_nix_prepare_mseg_vec(mbufs[j],
1325 vst1q_u64(lmt_addr, cmd0[j]);
1326 vst1q_u64(lmt_addr + 2, cmd1[j]);
1329 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1333 segdw[j + 1], flags);
1334 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1335 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1337 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1342 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1343 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1344 cn10k_nix_prepare_mseg_vec(mbufs[j],
1348 vst1q_u64(lmt_addr, cmd0[j]);
1349 vst1q_u64(lmt_addr + 2, cmd2[j]);
1350 vst1q_u64(lmt_addr + 4, cmd1[j]);
1353 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1354 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1355 cn10k_nix_prepare_mseg_vec(mbufs[j],
1359 vst1q_u64(lmt_addr, cmd0[j]);
1360 vst1q_u64(lmt_addr + 2, cmd2[j]);
1361 vst1q_u64(lmt_addr + 4, cmd1[j]);
1363 cn10k_nix_prepare_mseg_vec(mbufs[j],
1367 vst1q_u64(lmt_addr, cmd0[j]);
1368 vst1q_u64(lmt_addr + 2, cmd1[j]);
1370 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1381 static __rte_always_inline void
1382 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1383 uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1385 /* Go to next line if we are out of space */
1386 if ((*loff + (dw << 4)) > 128) {
1387 *data128 = *data128 |
1388 (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1389 *shift = *shift + 3;
1394 *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1395 *loff = *loff + (dw << 4);
1398 static __rte_always_inline void
1399 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1400 uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1401 uint64x2_t cmd3, const uint16_t flags)
1405 /* Handle no fast free when security is enabled without mseg */
1406 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1407 (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1408 !(flags & NIX_TX_MULTI_SEG_F)) {
1409 union nix_send_sg_s sg;
1411 sg.u = vgetq_lane_u64(cmd1, 0);
1412 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1413 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1415 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1416 sg.u = vgetq_lane_u64(cmd1, 0);
1417 if (!(sg.u & (1ULL << 55)))
1418 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1423 if (flags & NIX_TX_MULTI_SEG_F) {
1424 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1425 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1426 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1427 &cmd0, &cmd1, segdw, flags);
1428 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1429 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1430 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1433 vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1434 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1435 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1436 &cmd0, &cmd1, segdw, flags);
1437 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1438 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1439 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1441 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1442 &cmd0, &cmd1, segdw, flags);
1443 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1444 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1446 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1447 /* Store the prepared send desc to LMT lines */
1448 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1449 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1450 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1451 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1452 vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1454 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1455 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1456 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1459 /* Store the prepared send desc to LMT lines */
1460 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1461 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1465 static __rte_always_inline uint16_t
1466 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1467 struct rte_mbuf **tx_pkts, uint16_t pkts,
1468 uint64_t *cmd, const uint16_t flags)
1470 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1471 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1472 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1473 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1474 uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1475 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1476 uint64x2_t senddesc01_w0, senddesc23_w0;
1477 uint64x2_t senddesc01_w1, senddesc23_w1;
1478 uint64x2_t sendext01_w0, sendext23_w0;
1479 uint64x2_t sendext01_w1, sendext23_w1;
1480 uint64x2_t sendmem01_w0, sendmem23_w0;
1481 uint64x2_t sendmem01_w1, sendmem23_w1;
1482 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1483 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1484 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1485 struct cn10k_eth_txq *txq = tx_queue;
1486 rte_iova_t io_addr = txq->io_addr;
1487 uintptr_t laddr = txq->lmt_base;
1488 uint8_t c_lnum, c_shft, c_loff;
1489 uint64x2_t ltypes01, ltypes23;
1490 uint64x2_t xtmp128, ytmp128;
1491 uint64x2_t xmask01, xmask23;
1492 uintptr_t c_laddr = laddr;
1493 uint8_t lnum, shift, loff;
1494 rte_iova_t c_io_addr;
1497 __uint128_t data128;
1501 if (!(flags & NIX_TX_VWQE_F)) {
1502 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1503 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1504 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1505 /* Reduce the cached count */
1506 txq->fc_cache_pkts -= pkts;
1508 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1509 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1512 /* Perform header writes before barrier for TSO */
1513 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1514 for (i = 0; i < pkts; i++)
1515 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1518 if (!(flags & NIX_TX_VWQE_F)) {
1519 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1522 (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1523 ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1525 senddesc01_w0 = vdupq_n_u64(w0);
1527 senddesc23_w0 = senddesc01_w0;
1529 senddesc01_w1 = vdupq_n_u64(0);
1530 senddesc23_w1 = senddesc01_w1;
1531 sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1532 sgdesc23_w0 = sgdesc01_w0;
1534 if (flags & NIX_TX_NEED_EXT_HDR) {
1535 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1536 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1539 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1540 (NIX_SENDMEMALG_SETTSTMP << 56));
1541 sendmem23_w0 = sendmem01_w0;
1542 sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1543 sendmem23_w1 = sendmem01_w1;
1545 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1547 sendext23_w0 = sendext01_w0;
1549 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1550 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1552 sendext01_w1 = vdupq_n_u64(0);
1553 sendext23_w1 = sendext01_w1;
1556 /* Get LMT base address and LMT ID as lcore id */
1557 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1558 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1559 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1560 c_io_addr = txq->cpt_io_addr;
1561 sa_base = txq->sa_base;
1566 /* Number of packets to prepare depends on offloads enabled. */
1567 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1568 cn10k_nix_pkts_per_vec_brst(flags) :
1570 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1575 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1582 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1583 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1588 if (flags & NIX_TX_MULTI_SEG_F) {
1591 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1592 struct rte_mbuf *m = tx_pkts[j];
1594 /* Get dwords based on nb_segs. */
1595 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1596 /* Add dwords based on offloads. */
1597 segdw[j] += 1 + /* SEND HDR */
1598 !!(flags & NIX_TX_NEED_EXT_HDR) +
1599 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1602 /* Check if there are enough LMTLINES for this loop */
1603 if (lnum + 4 > 32) {
1604 uint8_t ldwords_con = 0, lneeded = 0;
1605 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1606 ldwords_con += segdw[j];
1607 if (ldwords_con > 8) {
1609 ldwords_con = segdw[j];
1613 if (lnum + lneeded > 32) {
1619 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1621 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1622 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1624 senddesc23_w0 = senddesc01_w0;
1625 sgdesc23_w0 = sgdesc01_w0;
1627 /* Clear vlan enables. */
1628 if (flags & NIX_TX_NEED_EXT_HDR) {
1629 sendext01_w1 = vbicq_u64(sendext01_w1,
1630 vdupq_n_u64(0x3FFFF00FFFF00));
1631 sendext23_w1 = sendext01_w1;
1634 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1635 /* Reset send mem alg to SETTSTMP from SUB*/
1636 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1637 vdupq_n_u64(BIT_ULL(59)));
1638 /* Reset send mem address to default. */
1640 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1641 sendmem23_w0 = sendmem01_w0;
1642 sendmem23_w1 = sendmem01_w1;
1645 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1646 /* Clear the LSO enable bit. */
1647 sendext01_w0 = vbicq_u64(sendext01_w0,
1648 vdupq_n_u64(BIT_ULL(14)));
1649 sendext23_w0 = sendext01_w0;
1652 /* Move mbufs to iova */
1653 mbuf0 = (uint64_t *)tx_pkts[0];
1654 mbuf1 = (uint64_t *)tx_pkts[1];
1655 mbuf2 = (uint64_t *)tx_pkts[2];
1656 mbuf3 = (uint64_t *)tx_pkts[3];
1658 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1659 offsetof(struct rte_mbuf, buf_iova));
1660 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1661 offsetof(struct rte_mbuf, buf_iova));
1662 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1663 offsetof(struct rte_mbuf, buf_iova));
1664 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1665 offsetof(struct rte_mbuf, buf_iova));
1667 * Get mbuf's, olflags, iova, pktlen, dataoff
1668 * dataoff_iovaX.D[0] = iova,
1669 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1670 * len_olflagsX.D[0] = ol_flags,
1671 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1673 dataoff_iova0 = vld1q_u64(mbuf0);
1674 len_olflags0 = vld1q_u64(mbuf0 + 2);
1675 dataoff_iova1 = vld1q_u64(mbuf1);
1676 len_olflags1 = vld1q_u64(mbuf1 + 2);
1677 dataoff_iova2 = vld1q_u64(mbuf2);
1678 len_olflags2 = vld1q_u64(mbuf2 + 2);
1679 dataoff_iova3 = vld1q_u64(mbuf3);
1680 len_olflags3 = vld1q_u64(mbuf3 + 2);
1682 /* Move mbufs to point pool */
1683 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1684 offsetof(struct rte_mbuf, pool) -
1685 offsetof(struct rte_mbuf, buf_iova));
1686 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1687 offsetof(struct rte_mbuf, pool) -
1688 offsetof(struct rte_mbuf, buf_iova));
1689 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1690 offsetof(struct rte_mbuf, pool) -
1691 offsetof(struct rte_mbuf, buf_iova));
1692 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1693 offsetof(struct rte_mbuf, pool) -
1694 offsetof(struct rte_mbuf, buf_iova));
1696 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1697 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1698 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1700 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1701 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1704 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1705 : [a] "+w"(senddesc01_w1)
1706 : [in] "r"(mbuf0 + 2)
1709 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1710 : [a] "+w"(senddesc01_w1)
1711 : [in] "r"(mbuf1 + 2)
1714 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1715 : [b] "+w"(senddesc23_w1)
1716 : [in] "r"(mbuf2 + 2)
1719 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1720 : [b] "+w"(senddesc23_w1)
1721 : [in] "r"(mbuf3 + 2)
1724 /* Get pool pointer alone */
1725 mbuf0 = (uint64_t *)*mbuf0;
1726 mbuf1 = (uint64_t *)*mbuf1;
1727 mbuf2 = (uint64_t *)*mbuf2;
1728 mbuf3 = (uint64_t *)*mbuf3;
1730 /* Get pool pointer alone */
1731 mbuf0 = (uint64_t *)*mbuf0;
1732 mbuf1 = (uint64_t *)*mbuf1;
1733 mbuf2 = (uint64_t *)*mbuf2;
1734 mbuf3 = (uint64_t *)*mbuf3;
1737 const uint8x16_t shuf_mask2 = {
1738 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1739 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1741 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1742 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1744 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1745 const uint64x2_t and_mask0 = {
1750 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1751 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1752 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1753 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1756 * Pick only 16 bits of pktlen preset at bits 63:32
1757 * and place them at bits 15:0.
1759 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1760 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1762 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1763 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1764 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1766 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1767 * pktlen at 15:0 position.
1769 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1770 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1771 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1772 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1774 /* Move mbuf to point to pool_id. */
1775 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1776 offsetof(struct rte_mempool, pool_id));
1777 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1778 offsetof(struct rte_mempool, pool_id));
1779 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1780 offsetof(struct rte_mempool, pool_id));
1781 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1782 offsetof(struct rte_mempool, pool_id));
1784 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1785 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1787 * Lookup table to translate ol_flags to
1788 * il3/il4 types. But we still use ol3/ol4 types in
1789 * senddesc_w1 as only one header processing is enabled.
1791 const uint8x16_t tbl = {
1792 /* [0-15] = il4type:il3type */
1793 0x04, /* none (IPv6 assumed) */
1794 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1795 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1796 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1797 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1798 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1799 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1800 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1801 0x02, /* RTE_MBUF_F_TX_IPV4 */
1802 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1803 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1804 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1805 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1806 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1807 * RTE_MBUF_F_TX_TCP_CKSUM
1809 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1810 * RTE_MBUF_F_TX_SCTP_CKSUM
1812 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1813 * RTE_MBUF_F_TX_UDP_CKSUM
1817 /* Extract olflags to translate to iltypes */
1818 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1819 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1822 * E(47):L3_LEN(9):L2_LEN(7+z)
1823 * E(47):L3_LEN(9):L2_LEN(7+z)
1825 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1826 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1828 /* Move OLFLAGS bits 55:52 to 51:48
1829 * with zeros preprended on the byte and rest
1832 xtmp128 = vshrq_n_u8(xtmp128, 4);
1833 ytmp128 = vshrq_n_u8(ytmp128, 4);
1835 * E(48):L3_LEN(8):L2_LEN(z+7)
1836 * E(48):L3_LEN(8):L2_LEN(z+7)
1838 const int8x16_t tshft3 = {
1839 -1, 0, 8, 8, 8, 8, 8, 8,
1840 -1, 0, 8, 8, 8, 8, 8, 8,
1843 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1844 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1847 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1848 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1850 /* Pick only relevant fields i.e Bit 48:55 of iltype
1851 * and place it in ol3/ol4type of senddesc_w1
1853 const uint8x16_t shuf_mask0 = {
1854 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1855 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1858 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1859 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1861 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1862 * a [E(32):E(16):OL3(8):OL2(8)]
1864 * a [E(32):E(16):(OL3+OL2):OL2]
1865 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1867 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1868 vshlq_n_u16(senddesc01_w1, 8));
1869 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1870 vshlq_n_u16(senddesc23_w1, 8));
1872 /* Move ltypes to senddesc*_w1 */
1873 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1874 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1875 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1876 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1878 * Lookup table to translate ol_flags to
1882 const uint8x16_t tbl = {
1883 /* [0-15] = ol4type:ol3type */
1885 0x03, /* OUTER_IP_CKSUM */
1886 0x02, /* OUTER_IPV4 */
1887 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1888 0x04, /* OUTER_IPV6 */
1889 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1890 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1891 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1894 0x00, /* OUTER_UDP_CKSUM */
1895 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1896 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1897 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1900 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1901 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1904 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1907 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1908 * OUTER_IPV4 | OUTER_IP_CKSUM
1912 /* Extract olflags to translate to iltypes */
1913 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1914 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1917 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1918 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1920 const uint8x16_t shuf_mask5 = {
1921 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1922 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1924 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1925 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1927 /* Extract outer ol flags only */
1928 const uint64x2_t o_cksum_mask = {
1933 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1934 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1936 /* Extract OUTER_UDP_CKSUM bit 41 and
1940 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1941 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1943 /* Shift oltype by 2 to start nibble from BIT(56)
1944 * instead of BIT(58)
1946 xtmp128 = vshrq_n_u8(xtmp128, 2);
1947 ytmp128 = vshrq_n_u8(ytmp128, 2);
1949 * E(48):L3_LEN(8):L2_LEN(z+7)
1950 * E(48):L3_LEN(8):L2_LEN(z+7)
1952 const int8x16_t tshft3 = {
1953 -1, 0, 8, 8, 8, 8, 8, 8,
1954 -1, 0, 8, 8, 8, 8, 8, 8,
1957 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1958 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1961 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1962 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1964 /* Pick only relevant fields i.e Bit 56:63 of oltype
1965 * and place it in ol3/ol4type of senddesc_w1
1967 const uint8x16_t shuf_mask0 = {
1968 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1969 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1972 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1973 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1975 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1976 * a [E(32):E(16):OL3(8):OL2(8)]
1978 * a [E(32):E(16):(OL3+OL2):OL2]
1979 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1981 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1982 vshlq_n_u16(senddesc01_w1, 8));
1983 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1984 vshlq_n_u16(senddesc23_w1, 8));
1986 /* Move ltypes to senddesc*_w1 */
1987 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1988 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1989 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1990 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1991 /* Lookup table to translate ol_flags to
1992 * ol4type, ol3type, il4type, il3type of senddesc_w1
1994 const uint8x16x2_t tbl = {{
1996 /* [0-15] = il4type:il3type */
1997 0x04, /* none (IPv6) */
1998 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1999 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
2000 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
2001 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
2002 0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
2003 * RTE_MBUF_F_TX_TCP_CKSUM
2005 0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2006 * RTE_MBUF_F_TX_SCTP_CKSUM
2008 0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2009 * RTE_MBUF_F_TX_UDP_CKSUM
2011 0x02, /* RTE_MBUF_F_TX_IPV4 */
2012 0x12, /* RTE_MBUF_F_TX_IPV4 |
2013 * RTE_MBUF_F_TX_TCP_CKSUM
2015 0x22, /* RTE_MBUF_F_TX_IPV4 |
2016 * RTE_MBUF_F_TX_SCTP_CKSUM
2018 0x32, /* RTE_MBUF_F_TX_IPV4 |
2019 * RTE_MBUF_F_TX_UDP_CKSUM
2021 0x03, /* RTE_MBUF_F_TX_IPV4 |
2022 * RTE_MBUF_F_TX_IP_CKSUM
2024 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2025 * RTE_MBUF_F_TX_TCP_CKSUM
2027 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2028 * RTE_MBUF_F_TX_SCTP_CKSUM
2030 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2031 * RTE_MBUF_F_TX_UDP_CKSUM
2036 /* [16-31] = ol4type:ol3type */
2038 0x03, /* OUTER_IP_CKSUM */
2039 0x02, /* OUTER_IPV4 */
2040 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2041 0x04, /* OUTER_IPV6 */
2042 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2043 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2044 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2047 0x00, /* OUTER_UDP_CKSUM */
2048 0x33, /* OUTER_UDP_CKSUM |
2051 0x32, /* OUTER_UDP_CKSUM |
2054 0x33, /* OUTER_UDP_CKSUM |
2055 * OUTER_IPV4 | OUTER_IP_CKSUM
2057 0x34, /* OUTER_UDP_CKSUM |
2060 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2063 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2066 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2067 * OUTER_IPV4 | OUTER_IP_CKSUM
2072 /* Extract olflags to translate to oltype & iltype */
2073 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2074 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2077 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2078 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2080 const uint32x4_t tshft_4 = {
2086 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2087 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2090 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2091 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2093 const uint8x16_t shuf_mask5 = {
2094 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2095 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2097 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2098 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2100 /* Extract outer and inner header ol_flags */
2101 const uint64x2_t oi_cksum_mask = {
2106 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2107 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2109 /* Extract OUTER_UDP_CKSUM bit 41 and
2113 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2114 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2116 /* Shift right oltype by 2 and iltype by 4
2117 * to start oltype nibble from BIT(58)
2118 * instead of BIT(56) and iltype nibble from BIT(48)
2119 * instead of BIT(52).
2121 const int8x16_t tshft5 = {
2122 8, 8, 8, 8, 8, 8, -4, -2,
2123 8, 8, 8, 8, 8, 8, -4, -2,
2126 xtmp128 = vshlq_u8(xtmp128, tshft5);
2127 ytmp128 = vshlq_u8(ytmp128, tshft5);
2129 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2130 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2132 const int8x16_t tshft3 = {
2133 -1, 0, -1, 0, 0, 0, 0, 0,
2134 -1, 0, -1, 0, 0, 0, 0, 0,
2137 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2138 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2140 /* Mark Bit(4) of oltype */
2141 const uint64x2_t oi_cksum_mask2 = {
2146 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2147 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2150 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2151 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2153 /* Pick only relevant fields i.e Bit 48:55 of iltype and
2154 * Bit 56:63 of oltype and place it in corresponding
2155 * place in senddesc_w1.
2157 const uint8x16_t shuf_mask0 = {
2158 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2159 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2162 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2163 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2165 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2166 * l3len, l2len, ol3len, ol2len.
2167 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2169 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2171 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2172 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2174 senddesc01_w1 = vaddq_u8(senddesc01_w1,
2175 vshlq_n_u32(senddesc01_w1, 8));
2176 senddesc23_w1 = vaddq_u8(senddesc23_w1,
2177 vshlq_n_u32(senddesc23_w1, 8));
2179 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2180 senddesc01_w1 = vaddq_u8(
2181 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2182 senddesc23_w1 = vaddq_u8(
2183 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2185 /* Move ltypes to senddesc*_w1 */
2186 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2187 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2190 xmask01 = vdupq_n_u64(0);
2192 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2197 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2202 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2207 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2211 xmask01 = vshlq_n_u64(xmask01, 20);
2212 xmask23 = vshlq_n_u64(xmask23, 20);
2214 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2215 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2217 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2218 /* Tx ol_flag for vlan. */
2219 const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2220 /* Bit enable for VLAN1 */
2221 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2222 /* Tx ol_flag for QnQ. */
2223 const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2224 /* Bit enable for VLAN0 */
2225 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2226 /* Load vlan values from packet. outer is VLAN 0 */
2227 uint64x2_t ext01 = {
2228 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2229 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2230 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2231 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2233 uint64x2_t ext23 = {
2234 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2235 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2236 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2237 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2240 /* Get ol_flags of the packets. */
2241 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2242 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2244 /* ORR vlan outer/inner values into cmd. */
2245 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2246 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2248 /* Test for offload enable bits and generate masks. */
2249 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2251 vandq_u64(vtstq_u64(xtmp128, olq),
2253 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2255 vandq_u64(vtstq_u64(ytmp128, olq),
2258 /* Set vlan enable bits into cmd based on mask. */
2259 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2260 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2263 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2264 /* Tx ol_flag for timestamp. */
2265 const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2266 RTE_MBUF_F_TX_IEEE1588_TMST};
2267 /* Set send mem alg to SUB. */
2268 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2269 /* Increment send mem address by 8. */
2270 const uint64x2_t addr = {0x8, 0x8};
2272 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2273 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2275 /* Check if timestamp is requested and generate inverted
2276 * mask as we need not make any changes to default cmd
2279 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2280 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2282 /* Change send mem address to an 8 byte offset when
2283 * TSTMP is disabled.
2285 sendmem01_w1 = vaddq_u64(sendmem01_w1,
2286 vandq_u64(xtmp128, addr));
2287 sendmem23_w1 = vaddq_u64(sendmem23_w1,
2288 vandq_u64(ytmp128, addr));
2289 /* Change send mem alg to SUB when TSTMP is disabled. */
2290 sendmem01_w0 = vorrq_u64(sendmem01_w0,
2291 vandq_u64(xtmp128, alg));
2292 sendmem23_w0 = vorrq_u64(sendmem23_w0,
2293 vandq_u64(ytmp128, alg));
2295 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2296 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2297 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2298 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2301 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2302 const uint64_t lso_fmt = txq->lso_tun_fmt;
2303 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2304 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2306 /* Extract SD W1 as we need to set L4 types. */
2307 vst1q_u64(sd_w1, senddesc01_w1);
2308 vst1q_u64(sd_w1 + 2, senddesc23_w1);
2310 /* Extract SX W0 as we need to set LSO fields. */
2311 vst1q_u64(sx_w0, sendext01_w0);
2312 vst1q_u64(sx_w0 + 2, sendext23_w0);
2314 /* Extract ol_flags. */
2315 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2316 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2318 /* Prepare individual mbufs. */
2319 cn10k_nix_prepare_tso(tx_pkts[0],
2320 (union nix_send_hdr_w1_u *)&sd_w1[0],
2321 (union nix_send_ext_w0_u *)&sx_w0[0],
2322 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2324 cn10k_nix_prepare_tso(tx_pkts[1],
2325 (union nix_send_hdr_w1_u *)&sd_w1[1],
2326 (union nix_send_ext_w0_u *)&sx_w0[1],
2327 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2329 cn10k_nix_prepare_tso(tx_pkts[2],
2330 (union nix_send_hdr_w1_u *)&sd_w1[2],
2331 (union nix_send_ext_w0_u *)&sx_w0[2],
2332 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2334 cn10k_nix_prepare_tso(tx_pkts[3],
2335 (union nix_send_hdr_w1_u *)&sd_w1[3],
2336 (union nix_send_ext_w0_u *)&sx_w0[3],
2337 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2339 senddesc01_w1 = vld1q_u64(sd_w1);
2340 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2342 sendext01_w0 = vld1q_u64(sx_w0);
2343 sendext23_w0 = vld1q_u64(sx_w0 + 2);
2346 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2347 !(flags & NIX_TX_MULTI_SEG_F) &&
2348 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2349 /* Set don't free bit if reference count > 1 */
2350 xmask01 = vdupq_n_u64(0);
2353 /* Move mbufs to iova */
2354 mbuf0 = (uint64_t *)tx_pkts[0];
2355 mbuf1 = (uint64_t *)tx_pkts[1];
2356 mbuf2 = (uint64_t *)tx_pkts[2];
2357 mbuf3 = (uint64_t *)tx_pkts[3];
2359 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2360 vsetq_lane_u64(0x80000, xmask01, 0);
2362 RTE_MEMPOOL_CHECK_COOKIES(
2363 ((struct rte_mbuf *)mbuf0)->pool,
2364 (void **)&mbuf0, 1, 0);
2366 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2367 vsetq_lane_u64(0x80000, xmask01, 1);
2369 RTE_MEMPOOL_CHECK_COOKIES(
2370 ((struct rte_mbuf *)mbuf1)->pool,
2371 (void **)&mbuf1, 1, 0);
2373 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2374 vsetq_lane_u64(0x80000, xmask23, 0);
2376 RTE_MEMPOOL_CHECK_COOKIES(
2377 ((struct rte_mbuf *)mbuf2)->pool,
2378 (void **)&mbuf2, 1, 0);
2380 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2381 vsetq_lane_u64(0x80000, xmask23, 1);
2383 RTE_MEMPOOL_CHECK_COOKIES(
2384 ((struct rte_mbuf *)mbuf3)->pool,
2385 (void **)&mbuf3, 1, 0);
2386 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2387 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2388 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2389 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2390 /* Move mbufs to iova */
2391 mbuf0 = (uint64_t *)tx_pkts[0];
2392 mbuf1 = (uint64_t *)tx_pkts[1];
2393 mbuf2 = (uint64_t *)tx_pkts[2];
2394 mbuf3 = (uint64_t *)tx_pkts[3];
2396 /* Mark mempool object as "put" since
2397 * it is freed by NIX
2399 RTE_MEMPOOL_CHECK_COOKIES(
2400 ((struct rte_mbuf *)mbuf0)->pool,
2401 (void **)&mbuf0, 1, 0);
2403 RTE_MEMPOOL_CHECK_COOKIES(
2404 ((struct rte_mbuf *)mbuf1)->pool,
2405 (void **)&mbuf1, 1, 0);
2407 RTE_MEMPOOL_CHECK_COOKIES(
2408 ((struct rte_mbuf *)mbuf2)->pool,
2409 (void **)&mbuf2, 1, 0);
2411 RTE_MEMPOOL_CHECK_COOKIES(
2412 ((struct rte_mbuf *)mbuf3)->pool,
2413 (void **)&mbuf3, 1, 0);
2416 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2417 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2418 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2419 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2420 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2422 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2423 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2424 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2425 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2427 if (flags & NIX_TX_NEED_EXT_HDR) {
2428 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2429 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2430 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2431 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2434 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2435 const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2436 RTE_MBUF_F_TX_SEC_OFFLOAD};
2440 /* Extract ol_flags. */
2441 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2442 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2444 xtmp128 = vtstq_u64(olf, xtmp128);
2445 ytmp128 = vtstq_u64(olf, ytmp128);
2448 dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2449 if (vgetq_lane_u64(xtmp128, 0))
2450 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2451 &cmd1[0], &next, c_laddr,
2453 &c_shft, sa_base, flags);
2455 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2456 &shift, &wd.data128, &next);
2458 /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2459 cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2460 cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2464 dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2465 if (vgetq_lane_u64(xtmp128, 1))
2466 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2467 &cmd1[1], &next, c_laddr,
2469 &c_shft, sa_base, flags);
2471 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2472 &shift, &wd.data128, &next);
2474 /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2475 cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2476 cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2480 dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2481 if (vgetq_lane_u64(ytmp128, 0))
2482 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2483 &cmd1[2], &next, c_laddr,
2485 &c_shft, sa_base, flags);
2487 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2488 &shift, &wd.data128, &next);
2490 /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2491 cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2492 cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2496 dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2497 if (vgetq_lane_u64(ytmp128, 1))
2498 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2499 &cmd1[3], &next, c_laddr,
2501 &c_shft, sa_base, flags);
2503 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2504 &shift, &wd.data128, &next);
2506 /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2507 cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2508 cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2511 } else if (flags & NIX_TX_MULTI_SEG_F) {
2515 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2518 LMT_OFF(laddr, lnum,
2520 &wd.data128, &shift,
2523 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2524 /* Store the prepared send desc to LMT lines */
2525 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2526 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2527 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2528 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2529 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2530 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2531 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2532 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2533 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2535 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2536 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2537 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2538 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2539 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2540 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2541 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2542 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2544 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2545 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2546 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2547 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2548 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2549 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2551 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2552 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2553 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2554 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2555 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2556 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2560 /* Store the prepared send desc to LMT lines */
2561 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2562 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2563 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2564 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2565 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2566 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2567 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2568 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2572 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2575 /* Roundup lnum to last line if it is partial */
2576 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2577 lnum = lnum + !!loff;
2578 wd.data128 = wd.data128 |
2579 (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2582 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2585 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
2586 ws[1] = roc_sso_hws_head_wait(ws[0]);
2590 /* Submit CPT instructions if any */
2591 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2592 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2597 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2598 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2600 pa = io_addr | (wd.data[0] & 0x7) << 4;
2601 wd.data[0] &= ~0x7ULL;
2603 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2606 wd.data[0] |= (15ULL << 12);
2607 wd.data[0] |= (uint64_t)lmt_id;
2610 roc_lmt_submit_steorl(wd.data[0], pa);
2612 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2613 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2615 pa = io_addr | (wd.data[1] & 0x7) << 4;
2616 wd.data[1] &= ~0x7ULL;
2618 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2621 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2622 wd.data[1] |= (uint64_t)(lmt_id + 16);
2625 roc_lmt_submit_steorl(wd.data[1], pa);
2627 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2628 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2630 pa = io_addr | (wd.data[0] & 0x7) << 4;
2631 wd.data[0] &= ~0x7ULL;
2633 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2636 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2637 wd.data[0] |= lmt_id;
2640 roc_lmt_submit_steorl(wd.data[0], pa);
2647 if (unlikely(scalar)) {
2648 if (flags & NIX_TX_MULTI_SEG_F)
2649 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2650 scalar, cmd, flags);
2652 pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2653 scalar, cmd, flags);
2660 static __rte_always_inline uint16_t
2661 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2662 struct rte_mbuf **tx_pkts, uint16_t pkts,
2663 uint64_t *cmd, const uint16_t flags)
2666 RTE_SET_USED(tx_queue);
2667 RTE_SET_USED(tx_pkts);
2670 RTE_SET_USED(flags);
2675 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2676 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2677 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2678 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2679 #define TSO_F NIX_TX_OFFLOAD_TSO_F
2680 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2681 #define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F
2683 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2684 #define NIX_TX_FASTPATH_MODES_0_15 \
2685 T(no_offload, 4, NIX_TX_OFFLOAD_NONE) \
2686 T(l3l4csum, 4, L3L4CSUM_F) \
2687 T(ol3ol4csum, 4, OL3OL4CSUM_F) \
2688 T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F) \
2689 T(vlan, 6, VLAN_F) \
2690 T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F) \
2691 T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F) \
2692 T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2693 T(noff, 4, NOFF_F) \
2694 T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F) \
2695 T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F) \
2696 T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2697 T(noff_vlan, 6, NOFF_F | VLAN_F) \
2698 T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F) \
2699 T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2700 T(noff_vlan_ol3ol4csum_l3l4csum, 6, \
2701 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2703 #define NIX_TX_FASTPATH_MODES_16_31 \
2705 T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F) \
2706 T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F) \
2707 T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2708 T(tso_vlan, 6, TSO_F | VLAN_F) \
2709 T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F) \
2710 T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F) \
2711 T(tso_vlan_ol3ol4csum_l3l4csum, 6, \
2712 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2713 T(tso_noff, 6, TSO_F | NOFF_F) \
2714 T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F) \
2715 T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F) \
2716 T(tso_noff_ol3ol4csum_l3l4csum, 6, \
2717 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2718 T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F) \
2719 T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2720 T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2721 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2722 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2724 #define NIX_TX_FASTPATH_MODES_32_47 \
2726 T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F) \
2727 T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F) \
2728 T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2729 T(ts_vlan, 8, TSP_F | VLAN_F) \
2730 T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F) \
2731 T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F) \
2732 T(ts_vlan_ol3ol4csum_l3l4csum, 8, \
2733 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2734 T(ts_noff, 8, TSP_F | NOFF_F) \
2735 T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F) \
2736 T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F) \
2737 T(ts_noff_ol3ol4csum_l3l4csum, 8, \
2738 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2739 T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F) \
2740 T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2741 T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2742 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2743 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2745 #define NIX_TX_FASTPATH_MODES_48_63 \
2746 T(ts_tso, 8, TSP_F | TSO_F) \
2747 T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F) \
2748 T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F) \
2749 T(ts_tso_ol3ol4csum_l3l4csum, 8, \
2750 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2751 T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F) \
2752 T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2753 T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2754 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2755 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2756 T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F) \
2757 T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2758 T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2759 T(ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2760 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2761 T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F) \
2762 T(ts_tso_noff_vlan_l3l4csum, 8, \
2763 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2764 T(ts_tso_noff_vlan_ol3ol4csum, 8, \
2765 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2766 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2767 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2769 #define NIX_TX_FASTPATH_MODES_64_79 \
2770 T(sec, 4, T_SEC_F) \
2771 T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F) \
2772 T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F) \
2773 T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2774 T(sec_vlan, 6, T_SEC_F | VLAN_F) \
2775 T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F) \
2776 T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F) \
2777 T(sec_vlan_ol3ol4csum_l3l4csum, 6, \
2778 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2779 T(sec_noff, 4, T_SEC_F | NOFF_F) \
2780 T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F) \
2781 T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F) \
2782 T(sec_noff_ol3ol4csum_l3l4csum, 4, \
2783 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2784 T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F) \
2785 T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2786 T(sec_noff_vlan_ol3ol4csum, 6, \
2787 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2788 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2789 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2791 #define NIX_TX_FASTPATH_MODES_80_95 \
2792 T(sec_tso, 6, T_SEC_F | TSO_F) \
2793 T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F) \
2794 T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F) \
2795 T(sec_tso_ol3ol4csum_l3l4csum, 6, \
2796 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2797 T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F) \
2798 T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2799 T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2800 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6, \
2801 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2802 T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F) \
2803 T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2804 T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2805 T(sec_tso_noff_ol3ol4csum_l3l4csum, 6, \
2806 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2807 T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F) \
2808 T(sec_tso_noff_vlan_l3l4csum, 6, \
2809 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2810 T(sec_tso_noff_vlan_ol3ol4csum, 6, \
2811 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2812 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2813 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2815 #define NIX_TX_FASTPATH_MODES_96_111 \
2816 T(sec_ts, 8, T_SEC_F | TSP_F) \
2817 T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F) \
2818 T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F) \
2819 T(sec_ts_ol3ol4csum_l3l4csum, 8, \
2820 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2821 T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F) \
2822 T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \
2823 T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \
2824 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8, \
2825 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2826 T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F) \
2827 T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \
2828 T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \
2829 T(sec_ts_noff_ol3ol4csum_l3l4csum, 8, \
2830 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2831 T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F) \
2832 T(sec_ts_noff_vlan_l3l4csum, 8, \
2833 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2834 T(sec_ts_noff_vlan_ol3ol4csum, 8, \
2835 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2836 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2837 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2839 #define NIX_TX_FASTPATH_MODES_112_127 \
2840 T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F) \
2841 T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \
2842 T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \
2843 T(sec_ts_tso_ol3ol4csum_l3l4csum, 8, \
2844 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2845 T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F) \
2846 T(sec_ts_tso_vlan_l3l4csum, 8, \
2847 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2848 T(sec_ts_tso_vlan_ol3ol4csum, 8, \
2849 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2850 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2851 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2852 T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F) \
2853 T(sec_ts_tso_noff_l3l4csum, 8, \
2854 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2855 T(sec_ts_tso_noff_ol3ol4csum, 8, \
2856 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2857 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2858 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2859 T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \
2860 T(sec_ts_tso_noff_vlan_l3l4csum, 8, \
2861 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2862 T(sec_ts_tso_noff_vlan_ol3ol4csum, 8, \
2863 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2864 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2865 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2868 #define NIX_TX_FASTPATH_MODES \
2869 NIX_TX_FASTPATH_MODES_0_15 \
2870 NIX_TX_FASTPATH_MODES_16_31 \
2871 NIX_TX_FASTPATH_MODES_32_47 \
2872 NIX_TX_FASTPATH_MODES_48_63 \
2873 NIX_TX_FASTPATH_MODES_64_79 \
2874 NIX_TX_FASTPATH_MODES_80_95 \
2875 NIX_TX_FASTPATH_MODES_96_111 \
2876 NIX_TX_FASTPATH_MODES_112_127
2878 #define T(name, sz, flags) \
2879 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
2880 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2881 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
2882 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2883 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
2884 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2885 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2886 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2888 NIX_TX_FASTPATH_MODES
2891 #define NIX_TX_XMIT(fn, sz, flags) \
2892 uint16_t __rte_noinline __rte_hot fn( \
2893 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2896 /* For TSO inner checksum is a must */ \
2897 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2898 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2900 return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
2904 #define NIX_TX_XMIT_MSEG(fn, sz, flags) \
2905 uint16_t __rte_noinline __rte_hot fn( \
2906 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2908 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
2909 /* For TSO inner checksum is a must */ \
2910 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2911 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2913 return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
2915 flags | NIX_TX_MULTI_SEG_F); \
2918 #define NIX_TX_XMIT_VEC(fn, sz, flags) \
2919 uint16_t __rte_noinline __rte_hot fn( \
2920 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2923 /* For TSO inner checksum is a must */ \
2924 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2925 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2927 return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts, \
2928 pkts, cmd, (flags)); \
2931 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags) \
2932 uint16_t __rte_noinline __rte_hot fn( \
2933 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2935 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
2936 /* For TSO inner checksum is a must */ \
2937 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2938 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2940 return cn10k_nix_xmit_pkts_vector( \
2941 tx_queue, NULL, tx_pkts, pkts, cmd, \
2942 (flags) | NIX_TX_MULTI_SEG_F); \
2945 #endif /* __CN10K_TX_H__ */