1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #include <rte_eventdev.h>
11 #define NIX_TX_OFFLOAD_NONE (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
19 #define NIX_TX_OFFLOAD_MAX (NIX_TX_OFFLOAD_SECURITY_F << 1)
21 /* Flags to control xmit_prepare function.
22 * Defining it from backwards to denote its been
23 * not used as offload flags to pick function
25 #define NIX_TX_VWQE_F BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
28 #define NIX_TX_NEED_SEND_HDR_W1 \
29 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
30 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
32 #define NIX_TX_NEED_EXT_HDR \
33 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
38 /* Cached value is low, Update the fc_cache_pkts */ \
39 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
40 /* Multiply with sqe_per_sqb to express in pkts */ \
41 (txq)->fc_cache_pkts = \
42 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
43 << (txq)->sqes_per_sqb_log2; \
44 /* Check it again for the room */ \
45 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51 * is encoded as 4bits.
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
57 /* Function to determine no of tx subdesc required in case ext
58 * sub desc is enabled.
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
63 return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
66 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
74 if (!(flags & NIX_TX_MULTI_SEG_F))
75 return cn10k_nix_tx_ext_subs(flags) + 2;
77 /* Already everything is accounted for in segdw */
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
84 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85 << ROC_LMT_LINES_PER_CORE_LOG2;
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
91 return (flags & NIX_TX_NEED_EXT_HDR) ?
92 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
99 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
102 /* This will be moved to addr area */
104 /* 15 vector sizes for single seg */
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
127 return ((flags & NIX_TX_NEED_EXT_HDR) ?
128 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
135 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
138 /* This will be moved to addr area */
140 /* 15 vector sizes for single seg */
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
163 /* We have two CPT instructions per LMTLine */
164 const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
167 /* This will be moved to addr area */
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
190 const uint16_t flags, const uint16_t static_sz)
193 cmd[0] = txq->send_hdr_w0;
195 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
196 ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
199 if (flags & NIX_TX_NEED_EXT_HDR) {
200 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
201 cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
203 cmd[2] = NIX_SUBDC_EXT << 60;
205 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
207 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
211 static __rte_always_inline void
212 cn10k_nix_sec_fc_wait(struct cn10k_eth_txq *txq, uint16_t nb_pkts)
214 int32_t nb_desc, val, newval;
216 volatile uint64_t *fc;
218 /* Check if there is any CPT instruction to submit */
223 fc_sw = txq->cpt_fc_sw;
224 val = __atomic_sub_fetch(fc_sw, nb_pkts, __ATOMIC_RELAXED);
225 if (likely(val >= 0))
228 nb_desc = txq->cpt_desc;
231 newval = nb_desc - __atomic_load_n(fc, __ATOMIC_RELAXED);
237 if (!__atomic_compare_exchange_n(fc_sw, &val, newval, false,
238 __ATOMIC_RELAXED, __ATOMIC_RELAXED))
242 static __rte_always_inline void
243 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
244 uint8_t loff, uint8_t shft)
249 /* Check if there is any CPT instruction to submit */
253 data = cn10k_cpt_tx_steor_data();
254 /* Update lmtline use for partial end line */
256 data &= ~(0x7ULL << shft);
257 /* Update it to half full i.e 64B */
258 data |= (0x3UL << shft);
261 pa = io_addr | ((data >> 16) & 0x7) << 4;
262 data &= ~(0x7ULL << 16);
263 /* Update lines - 1 that contain valid data */
264 data |= ((uint64_t)(lnum + loff - 1)) << 12;
268 roc_lmt_submit_steorl(data, pa);
271 #if defined(RTE_ARCH_ARM64)
272 static __rte_always_inline void
273 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
274 uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
275 uint8_t *loff, uint8_t *shft, uint64_t sa_base,
276 const uint16_t flags)
278 struct cn10k_sec_sess_priv sess_priv;
279 uint32_t pkt_len, dlen_adj, rlen;
280 uint8_t l3l4type, chksum;
281 uint64x2_t cmd01, cmd23;
282 uint8_t l2_len, l3_len;
283 uintptr_t dptr, nixtx;
284 uint64_t ucode_cmd[4];
289 sess_priv.u64 = *rte_security_dynfield(m);
291 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
292 /* Extract l3l4type either from il3il4type or ol3ol4type */
293 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
294 flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
295 l2_len = vgetq_lane_u8(*cmd0, 10);
296 /* L4 ptr from send hdr includes l2 and l3 len */
297 l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len;
298 l3l4type = vgetq_lane_u8(*cmd0, 13);
300 l2_len = vgetq_lane_u8(*cmd0, 8);
301 /* L4 ptr from send hdr includes l2 and l3 len */
302 l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len;
303 l3l4type = vgetq_lane_u8(*cmd0, 12);
306 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
308 sess_priv.chksum = sess_priv.chksum & chksum;
309 /* Clear SEND header flags */
310 *cmd0 = vsetq_lane_u16(0, *cmd0, 6);
317 dptr = vgetq_lane_u64(*cmd1, 1);
318 pkt_len = vgetq_lane_u16(*cmd0, 0);
320 /* Calculate dlen adj */
321 dlen_adj = pkt_len - l2_len;
322 /* Exclude l3 len from roundup for transport mode */
323 dlen_adj -= sess_priv.mode ? 0 : l3_len;
324 rlen = (dlen_adj + sess_priv.roundup_len) +
325 (sess_priv.roundup_byte - 1);
326 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
327 rlen += sess_priv.partial_len;
328 dlen_adj = rlen - dlen_adj;
330 /* Update send descriptors. Security is single segment only */
331 *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
332 *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
334 /* Get area where NIX descriptor needs to be stored */
335 nixtx = dptr + pkt_len + dlen_adj;
337 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
339 /* Return nixtx addr */
340 *nixtx_addr = (nixtx + 16);
342 /* DLEN passed is excluding L2HDR */
344 tag = sa_base & 0xFFFFUL;
345 sa_base &= ~0xFFFFUL;
346 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
347 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
348 ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
349 ((uint64_t)sess_priv.chksum) << 32 |
350 ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
352 /* CPT Word 0 and Word 1 */
353 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
354 /* CPT_RES_S is 16B above NIXTX */
355 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
357 /* CPT word 2 and 3 */
358 cmd23 = vdupq_n_u64(0);
359 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
360 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
361 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
365 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
366 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
367 *((uint16_t *)(dptr - 2)) =
368 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
370 *((uint16_t *)(dptr - 2)) =
371 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
377 /* Move to our line */
378 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
380 /* Write CPT instruction to lmt line */
381 vst1q_u64(laddr, cmd01);
382 vst1q_u64((laddr + 2), cmd23);
384 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
385 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
387 /* Move to next line for every other CPT inst */
389 *lnum = *lnum + (*loff ? 0 : 1);
390 *shft = *shft + (*loff ? 0 : 3);
393 static __rte_always_inline void
394 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
395 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
396 uint64_t sa_base, const uint16_t flags)
398 struct cn10k_sec_sess_priv sess_priv;
399 uint32_t pkt_len, dlen_adj, rlen;
400 struct nix_send_hdr_s *send_hdr;
401 uint8_t l3l4type, chksum;
402 uint64x2_t cmd01, cmd23;
403 union nix_send_sg_s *sg;
404 uint8_t l2_len, l3_len;
405 uintptr_t dptr, nixtx;
406 uint64_t ucode_cmd[4];
411 /* Move to our line from base */
412 sess_priv.u64 = *rte_security_dynfield(m);
413 send_hdr = (struct nix_send_hdr_s *)cmd;
414 if (flags & NIX_TX_NEED_EXT_HDR)
415 sg = (union nix_send_sg_s *)&cmd[4];
417 sg = (union nix_send_sg_s *)&cmd[2];
419 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
420 /* Extract l3l4type either from il3il4type or ol3ol4type */
421 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
422 flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
423 l2_len = (cmd[1] >> 16) & 0xFF;
424 /* L4 ptr from send hdr includes l2 and l3 len */
425 l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len;
426 l3l4type = (cmd[1] >> 40) & 0xFF;
428 l2_len = cmd[1] & 0xFF;
429 /* L4 ptr from send hdr includes l2 and l3 len */
430 l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len;
431 l3l4type = (cmd[1] >> 32) & 0xFF;
434 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
436 sess_priv.chksum = sess_priv.chksum & chksum;
437 /* Clear SEND header flags */
438 cmd[1] &= ~(0xFFFFUL << 32);
445 dptr = *(uint64_t *)(sg + 1);
446 pkt_len = send_hdr->w0.total;
448 /* Calculate dlen adj */
449 dlen_adj = pkt_len - l2_len;
450 /* Exclude l3 len from roundup for transport mode */
451 dlen_adj -= sess_priv.mode ? 0 : l3_len;
452 rlen = (dlen_adj + sess_priv.roundup_len) +
453 (sess_priv.roundup_byte - 1);
454 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
455 rlen += sess_priv.partial_len;
456 dlen_adj = rlen - dlen_adj;
458 /* Update send descriptors. Security is single segment only */
459 send_hdr->w0.total = pkt_len + dlen_adj;
460 sg->seg1_size = pkt_len + dlen_adj;
462 /* Get area where NIX descriptor needs to be stored */
463 nixtx = dptr + pkt_len + dlen_adj;
465 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
467 /* Return nixtx addr */
468 *nixtx_addr = (nixtx + 16);
470 /* DLEN passed is excluding L2HDR */
472 tag = sa_base & 0xFFFFUL;
473 sa_base &= ~0xFFFFUL;
474 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
475 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
476 ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
477 ((uint64_t)sess_priv.chksum) << 32 |
478 ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
480 /* CPT Word 0 and Word 1. Assume no multi-seg support */
481 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
482 /* CPT_RES_S is 16B above NIXTX */
483 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
485 /* CPT word 2 and 3 */
486 cmd23 = vdupq_n_u64(0);
487 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
488 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
489 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
493 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
494 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
495 *((uint16_t *)(dptr - 2)) =
496 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
498 *((uint16_t *)(dptr - 2)) =
499 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
504 /* Move to our line */
505 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
507 /* Write CPT instruction to lmt line */
508 vst1q_u64(laddr, cmd01);
509 vst1q_u64((laddr + 2), cmd23);
511 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
512 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
514 /* Move to next line for every other CPT inst */
516 *lnum = *lnum + (*loff ? 0 : 1);
517 *shft = *shft + (*loff ? 0 : 3);
522 static __rte_always_inline void
523 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
524 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
525 uint64_t sa_base, const uint16_t flags)
529 RTE_SET_USED(nixtx_addr);
534 RTE_SET_USED(sa_base);
539 static __rte_always_inline void
540 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
542 uint64_t mask, ol_flags = m->ol_flags;
544 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
545 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
546 uint16_t *iplen, *oiplen, *oudplen;
547 uint16_t lso_sb, paylen;
549 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
550 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
551 m->l2_len + m->l3_len + m->l4_len;
553 /* Reduce payload len from base headers */
554 paylen = m->pkt_len - lso_sb;
556 /* Get iplen position assuming no tunnel hdr */
557 iplen = (uint16_t *)(mdata + m->l2_len +
558 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
559 /* Handle tunnel tso */
560 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
561 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
562 const uint8_t is_udp_tun =
563 (CNXK_NIX_UDP_TUN_BITMASK >>
564 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
567 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
569 RTE_MBUF_F_TX_OUTER_IPV6)));
570 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
573 /* Update format for UDP tunneled packet */
575 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
576 m->outer_l3_len + 4);
577 *oudplen = rte_cpu_to_be_16(
578 rte_be_to_cpu_16(*oudplen) - paylen);
581 /* Update iplen position to inner ip hdr */
582 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
584 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
587 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
591 static __rte_always_inline void
592 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
593 const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
596 uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
597 struct nix_send_ext_s *send_hdr_ext;
598 struct nix_send_hdr_s *send_hdr;
599 uint64_t ol_flags = 0, mask;
600 union nix_send_hdr_w1_u w1;
601 union nix_send_sg_s *sg;
602 uint16_t mark_form = 0;
604 send_hdr = (struct nix_send_hdr_s *)cmd;
605 if (flags & NIX_TX_NEED_EXT_HDR) {
606 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
607 sg = (union nix_send_sg_s *)(cmd + 4);
608 /* Clear previous markings */
609 send_hdr_ext->w0.lso = 0;
610 send_hdr_ext->w0.mark_en = 0;
611 send_hdr_ext->w1.u = 0;
612 ol_flags = m->ol_flags;
614 sg = (union nix_send_sg_s *)(cmd + 2);
617 if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
618 ol_flags = m->ol_flags;
622 if (!(flags & NIX_TX_MULTI_SEG_F))
623 send_hdr->w0.total = m->data_len;
625 send_hdr->w0.total = m->pkt_len;
626 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
630 * 3 => IPV4 with csum
632 * L3type and L3ptr needs to be set for either
633 * L3 csum or L4 csum or LSO
637 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
638 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
639 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
640 const uint8_t ol3type =
641 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
642 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
643 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
646 w1.ol3type = ol3type;
647 mask = 0xffffull << ((!!ol3type) << 4);
648 w1.ol3ptr = ~mask & m->outer_l2_len;
649 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
652 w1.ol4type = csum + (csum << 1);
655 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
656 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
657 w1.il3ptr = w1.ol4ptr + m->l2_len;
658 w1.il4ptr = w1.il3ptr + m->l3_len;
659 /* Increment it by 1 if it is IPV4 as 3 is with csum */
660 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
663 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
665 /* In case of no tunnel header use only
666 * shift IL3/IL4 fields a bit to use
667 * OL3/OL4 for header checksum
670 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
671 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
673 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
674 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
675 const uint8_t outer_l2_len = m->outer_l2_len;
678 w1.ol3ptr = outer_l2_len;
679 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
680 /* Increment it by 1 if it is IPV4 as 3 is with csum */
681 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
682 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
683 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
686 w1.ol4type = csum + (csum << 1);
688 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
689 const uint8_t l2_len = m->l2_len;
691 /* Always use OLXPTR and OLXTYPE when only
692 * when one header is present
697 w1.ol4ptr = l2_len + m->l3_len;
698 /* Increment it by 1 if it is IPV4 as 3 is with csum */
699 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
700 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
701 !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
704 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
707 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
708 const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
709 const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
710 RTE_MBUF_F_TX_IPV6));
712 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
713 /* HW will update ptr after vlan0 update */
714 send_hdr_ext->w1.vlan1_ins_ptr = 12;
715 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
717 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
718 /* 2B before end of l2 header */
719 send_hdr_ext->w1.vlan0_ins_ptr = 12;
720 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
721 /* Fill for VLAN marking only when VLAN insertion enabled */
722 mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
723 (send_hdr_ext->w1.vlan1_ins_ena ||
724 send_hdr_ext->w1.vlan0_ins_ena));
726 /* Mask requested flags with packet data information */
727 mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
728 mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
730 mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
731 mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
732 markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
734 send_hdr_ext->w0.mark_en = !!mark_off;
735 send_hdr_ext->w0.markform = mark_form & 0x7F;
736 send_hdr_ext->w0.markptr = markptr;
739 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
743 mask = -(!w1.il3type);
744 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
746 send_hdr_ext->w0.lso_sb = lso_sb;
747 send_hdr_ext->w0.lso = 1;
748 send_hdr_ext->w0.lso_mps = m->tso_segsz;
749 send_hdr_ext->w0.lso_format =
750 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
751 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
753 /* Handle tunnel tso */
754 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
755 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
756 const uint8_t is_udp_tun =
757 (CNXK_NIX_UDP_TUN_BITMASK >>
758 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
760 uint8_t shift = is_udp_tun ? 32 : 0;
762 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
763 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
765 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
766 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
767 /* Update format for UDP tunneled packet */
768 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
772 if (flags & NIX_TX_NEED_SEND_HDR_W1)
773 send_hdr->w1.u = w1.u;
775 if (!(flags & NIX_TX_MULTI_SEG_F)) {
776 sg->seg1_size = send_hdr->w0.total;
777 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
779 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
780 /* DF bit = 1 if refcount of current mbuf or parent mbuf
782 * DF bit = 0 otherwise
784 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
786 /* Mark mempool object as "put" since it is freed by NIX */
787 if (!send_hdr->w0.df)
788 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
790 sg->seg1_size = m->data_len;
791 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
793 /* NOFF is handled later for multi-seg */
796 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
797 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
800 static __rte_always_inline void
801 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
802 const uint16_t flags)
804 struct nix_send_ext_s *send_hdr_ext;
805 union nix_send_sg_s *sg;
807 /* With minimal offloads, 'cmd' being local could be optimized out to
808 * registers. In other cases, 'cmd' will be in stack. Intent is
809 * 'cmd' stores content from txq->cmd which is copied only once.
811 *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
813 if (flags & NIX_TX_NEED_EXT_HDR) {
814 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
815 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
818 sg = (union nix_send_sg_s *)(cmd + 4);
820 sg = (union nix_send_sg_s *)(cmd + 2);
822 /* In case of multi-seg, sg template is stored here */
823 *((union nix_send_sg_s *)lmt_addr) = *sg;
824 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
827 static __rte_always_inline void
828 cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
829 const uint64_t ol_flags, const uint16_t no_segdw,
830 const uint16_t flags)
832 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
833 const uint8_t is_ol_tstamp =
834 !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
835 uint64_t *lmt = (uint64_t *)lmt_addr;
836 uint16_t off = (no_segdw - 1) << 1;
837 struct nix_send_mem_s *send_mem;
839 send_mem = (struct nix_send_mem_s *)(lmt + off);
840 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
841 * should not be recorded, hence changing the alg type to
842 * NIX_SENDMEMALG_SUB and also changing send mem addr field to
843 * next 8 bytes as it corrupts the actual Tx tstamp registered
846 send_mem->w0.subdc = NIX_SUBDC_MEM;
848 NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
850 (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
854 static __rte_always_inline uint16_t
855 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
857 struct nix_send_hdr_s *send_hdr;
858 union nix_send_sg_s *sg;
859 struct rte_mbuf *m_next;
860 uint64_t *slist, sg_u;
865 send_hdr = (struct nix_send_hdr_s *)cmd;
867 if (flags & NIX_TX_NEED_EXT_HDR)
872 sg = (union nix_send_sg_s *)&cmd[2 + off];
874 /* Start from second segment, first segment is already there */
877 nb_segs = m->nb_segs - 1;
879 slist = &cmd[3 + off + 1];
881 /* Set invert df if buffer is not to be freed by H/W */
882 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
883 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
885 /* Mark mempool object as "put" since it is freed by NIX */
886 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
887 if (!(sg_u & (1ULL << 55)))
888 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
895 /* Fill mbuf segments */
898 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
899 *slist = rte_mbuf_data_iova(m);
900 /* Set invert df if buffer is not to be freed by H/W */
901 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
902 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
903 /* Mark mempool object as "put" since it is freed by NIX
905 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
906 if (!(sg_u & (1ULL << (i + 55))))
907 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
912 if (i > 2 && nb_segs) {
914 /* Next SG subdesc */
915 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
918 sg = (union nix_send_sg_s *)slist;
928 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
929 /* Roundup extra dwords to multiple of 2 */
930 segdw = (segdw >> 1) + (segdw & 0x1);
932 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
933 send_hdr->w0.sizem1 = segdw - 1;
938 static __rte_always_inline uint16_t
939 cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
940 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
942 struct cn10k_eth_txq *txq = tx_queue;
943 const rte_iova_t io_addr = txq->io_addr;
944 uint8_t lnum, c_lnum, c_shft, c_loff;
945 uintptr_t pa, lbase = txq->lmt_base;
946 uint16_t lmt_id, burst, left, i;
947 uintptr_t c_lbase = lbase;
948 uint64_t lso_tun_fmt = 0;
949 uint64_t mark_fmt = 0;
950 uint8_t mark_flag = 0;
951 rte_iova_t c_io_addr;
958 if (!(flags & NIX_TX_VWQE_F)) {
959 NIX_XMIT_FC_OR_RETURN(txq, pkts);
960 /* Reduce the cached count */
961 txq->fc_cache_pkts -= pkts;
963 /* Get cmd skeleton */
964 cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
966 if (flags & NIX_TX_OFFLOAD_TSO_F)
967 lso_tun_fmt = txq->lso_tun_fmt;
969 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
970 mark_fmt = txq->mark_fmt;
971 mark_flag = txq->mark_flag;
974 /* Get LMT base address and LMT ID as lcore id */
975 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
976 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
977 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
978 c_io_addr = txq->cpt_io_addr;
979 sa_base = txq->sa_base;
984 burst = left > 32 ? 32 : left;
987 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
993 for (i = 0; i < burst; i++) {
994 /* Perform header writes for TSO, barrier at
995 * lmt steorl will suffice.
997 if (flags & NIX_TX_OFFLOAD_TSO_F)
998 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1000 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1001 &sec, mark_flag, mark_fmt);
1003 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1005 /* Prepare CPT instruction and get nixtx addr */
1006 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1007 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1008 &c_lnum, &c_loff, &c_shft, sa_base,
1011 /* Move NIX desc to LMT/NIXTX area */
1012 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1013 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1015 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
1019 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1020 ws[1] = roc_sso_hws_head_wait(ws[0]);
1025 /* Submit CPT instructions if any */
1026 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1027 /* Reduce pkts to be sent to CPT */
1028 burst -= ((c_lnum << 1) + c_loff);
1029 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
1030 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1036 data = cn10k_nix_tx_steor_data(flags);
1037 pa = io_addr | (data & 0x7) << 4;
1039 data |= (15ULL << 12);
1040 data |= (uint64_t)lmt_id;
1043 roc_lmt_submit_steorl(data, pa);
1045 data = cn10k_nix_tx_steor_data(flags);
1046 pa = io_addr | (data & 0x7) << 4;
1048 data |= ((uint64_t)(burst - 17)) << 12;
1049 data |= (uint64_t)(lmt_id + 16);
1052 roc_lmt_submit_steorl(data, pa);
1054 data = cn10k_nix_tx_steor_data(flags);
1055 pa = io_addr | (data & 0x7) << 4;
1057 data |= ((uint64_t)(burst - 1)) << 12;
1061 roc_lmt_submit_steorl(data, pa);
1071 static __rte_always_inline uint16_t
1072 cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
1073 struct rte_mbuf **tx_pkts, uint16_t pkts,
1074 uint64_t *cmd, const uint16_t flags)
1076 struct cn10k_eth_txq *txq = tx_queue;
1077 uintptr_t pa0, pa1, lbase = txq->lmt_base;
1078 const rte_iova_t io_addr = txq->io_addr;
1079 uint16_t segdw, lmt_id, burst, left, i;
1080 uint8_t lnum, c_lnum, c_loff;
1081 uintptr_t c_lbase = lbase;
1082 uint64_t lso_tun_fmt = 0;
1083 uint64_t mark_fmt = 0;
1084 uint8_t mark_flag = 0;
1085 uint64_t data0, data1;
1086 rte_iova_t c_io_addr;
1087 uint8_t shft, c_shft;
1088 __uint128_t data128;
1094 if (!(flags & NIX_TX_VWQE_F)) {
1095 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1096 /* Reduce the cached count */
1097 txq->fc_cache_pkts -= pkts;
1099 /* Get cmd skeleton */
1100 cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
1102 if (flags & NIX_TX_OFFLOAD_TSO_F)
1103 lso_tun_fmt = txq->lso_tun_fmt;
1105 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1106 mark_fmt = txq->mark_fmt;
1107 mark_flag = txq->mark_flag;
1110 /* Get LMT base address and LMT ID as lcore id */
1111 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1112 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1113 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1114 c_io_addr = txq->cpt_io_addr;
1115 sa_base = txq->sa_base;
1120 burst = left > 32 ? 32 : left;
1125 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1131 for (i = 0; i < burst; i++) {
1132 /* Perform header writes for TSO, barrier at
1133 * lmt steorl will suffice.
1135 if (flags & NIX_TX_OFFLOAD_TSO_F)
1136 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1138 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1139 &sec, mark_flag, mark_fmt);
1141 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1143 /* Prepare CPT instruction and get nixtx addr */
1144 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1145 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1146 &c_lnum, &c_loff, &c_shft, sa_base,
1149 /* Move NIX desc to LMT/NIXTX area */
1150 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1151 /* Store sg list directly on lmt line */
1152 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1154 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1156 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1158 data128 |= (((__uint128_t)(segdw - 1)) << shft);
1163 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1164 ws[1] = roc_sso_hws_head_wait(ws[0]);
1169 /* Submit CPT instructions if any */
1170 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1171 /* Reduce pkts to be sent to CPT */
1172 burst -= ((c_lnum << 1) + c_loff);
1173 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
1174 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1178 data0 = (uint64_t)data128;
1179 data1 = (uint64_t)(data128 >> 64);
1180 /* Make data0 similar to data1 */
1184 pa0 = io_addr | (data0 & 0x7) << 4;
1186 /* Move lmtst1..15 sz to bits 63:19 */
1188 data0 |= (15ULL << 12);
1189 data0 |= (uint64_t)lmt_id;
1192 roc_lmt_submit_steorl(data0, pa0);
1194 pa1 = io_addr | (data1 & 0x7) << 4;
1197 data1 |= ((uint64_t)(burst - 17)) << 12;
1198 data1 |= (uint64_t)(lmt_id + 16);
1201 roc_lmt_submit_steorl(data1, pa1);
1203 pa0 = io_addr | (data0 & 0x7) << 4;
1205 /* Move lmtst1..15 sz to bits 63:19 */
1207 data0 |= ((burst - 1) << 12);
1208 data0 |= (uint64_t)lmt_id;
1211 roc_lmt_submit_steorl(data0, pa0);
1221 #if defined(RTE_ARCH_ARM64)
1223 static __rte_always_inline void
1224 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1225 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1226 const uint64_t flags, const uint64_t lso_tun_fmt)
1231 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1234 mask = -(!w1->il3type);
1235 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1238 w0->lso_sb = lso_sb;
1239 w0->lso_mps = m->tso_segsz;
1240 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1241 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1243 /* Handle tunnel tso */
1244 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1245 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1246 const uint8_t is_udp_tun =
1247 (CNXK_NIX_UDP_TUN_BITMASK >>
1248 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1250 uint8_t shift = is_udp_tun ? 32 : 0;
1252 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1253 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1255 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1256 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1257 /* Update format for UDP tunneled packet */
1259 w0->lso_format = (lso_tun_fmt >> shift);
1263 static __rte_always_inline void
1264 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1265 union nix_send_hdr_w0_u *sh,
1266 union nix_send_sg_s *sg, const uint32_t flags)
1268 struct rte_mbuf *m_next;
1269 uint64_t *slist, sg_u;
1273 sh->total = m->pkt_len;
1274 /* Clear sg->u header before use */
1275 sg->u &= 0xFC00000000000000;
1279 sg_u = sg_u | ((uint64_t)m->data_len);
1281 nb_segs = m->nb_segs - 1;
1284 /* Set invert df if buffer is not to be freed by H/W */
1285 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1286 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1287 /* Mark mempool object as "put" since it is freed by NIX */
1288 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1289 if (!(sg_u & (1ULL << 55)))
1290 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1295 /* Fill mbuf segments */
1298 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1299 *slist = rte_mbuf_data_iova(m);
1300 /* Set invert df if buffer is not to be freed by H/W */
1301 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1302 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1303 /* Mark mempool object as "put" since it is freed by NIX
1305 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1306 if (!(sg_u & (1ULL << (i + 55))))
1307 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1313 if (i > 2 && nb_segs) {
1315 /* Next SG subdesc */
1316 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1319 sg = (union nix_send_sg_s *)slist;
1330 static __rte_always_inline void
1331 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1332 uint64x2_t *cmd1, const uint8_t segdw,
1333 const uint32_t flags)
1335 union nix_send_hdr_w0_u sh;
1336 union nix_send_sg_s sg;
1338 if (m->nb_segs == 1) {
1339 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1340 sg.u = vgetq_lane_u64(cmd1[0], 0);
1341 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1342 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1345 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1346 sg.u = vgetq_lane_u64(cmd1[0], 0);
1347 if (!(sg.u & (1ULL << 55)))
1348 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1354 sh.u = vgetq_lane_u64(cmd0[0], 0);
1355 sg.u = vgetq_lane_u64(cmd1[0], 0);
1357 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1359 sh.sizem1 = segdw - 1;
1360 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1361 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1364 #define NIX_DESCS_PER_LOOP 4
1366 static __rte_always_inline uint8_t
1367 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1368 uint64x2_t *cmd1, uint64x2_t *cmd2,
1369 uint64x2_t *cmd3, uint8_t *segdw,
1370 uint64_t *lmt_addr, __uint128_t *data128,
1371 uint8_t *shift, const uint16_t flags)
1373 uint8_t j, off, lmt_used;
1375 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1376 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1377 /* No segments in 4 consecutive packets. */
1378 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1379 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1380 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1383 vst1q_u64(lmt_addr, cmd0[0]);
1384 vst1q_u64(lmt_addr + 2, cmd1[0]);
1385 vst1q_u64(lmt_addr + 4, cmd0[1]);
1386 vst1q_u64(lmt_addr + 6, cmd1[1]);
1387 vst1q_u64(lmt_addr + 8, cmd0[2]);
1388 vst1q_u64(lmt_addr + 10, cmd1[2]);
1389 vst1q_u64(lmt_addr + 12, cmd0[3]);
1390 vst1q_u64(lmt_addr + 14, cmd1[3]);
1392 *data128 |= ((__uint128_t)7) << *shift;
1400 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1401 /* Fit consecutive packets in same LMTLINE. */
1402 if ((segdw[j] + segdw[j + 1]) <= 8) {
1403 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1404 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1407 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1410 segdw[j + 1], flags);
1411 /* TSTAMP takes 4 each, no segs. */
1412 vst1q_u64(lmt_addr, cmd0[j]);
1413 vst1q_u64(lmt_addr + 2, cmd2[j]);
1414 vst1q_u64(lmt_addr + 4, cmd1[j]);
1415 vst1q_u64(lmt_addr + 6, cmd3[j]);
1417 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1418 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1419 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1420 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1421 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1422 /* EXT header take 3 each, space for 2 segs.*/
1423 cn10k_nix_prepare_mseg_vec(mbufs[j],
1427 vst1q_u64(lmt_addr, cmd0[j]);
1428 vst1q_u64(lmt_addr + 2, cmd2[j]);
1429 vst1q_u64(lmt_addr + 4, cmd1[j]);
1432 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1433 lmt_addr + 12 + off,
1436 segdw[j + 1], flags);
1437 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1438 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1439 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1441 cn10k_nix_prepare_mseg_vec(mbufs[j],
1445 vst1q_u64(lmt_addr, cmd0[j]);
1446 vst1q_u64(lmt_addr + 2, cmd1[j]);
1449 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1453 segdw[j + 1], flags);
1454 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1455 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1457 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1462 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1463 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1464 cn10k_nix_prepare_mseg_vec(mbufs[j],
1468 vst1q_u64(lmt_addr, cmd0[j]);
1469 vst1q_u64(lmt_addr + 2, cmd2[j]);
1470 vst1q_u64(lmt_addr + 4, cmd1[j]);
1473 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1474 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1475 cn10k_nix_prepare_mseg_vec(mbufs[j],
1479 vst1q_u64(lmt_addr, cmd0[j]);
1480 vst1q_u64(lmt_addr + 2, cmd2[j]);
1481 vst1q_u64(lmt_addr + 4, cmd1[j]);
1483 cn10k_nix_prepare_mseg_vec(mbufs[j],
1487 vst1q_u64(lmt_addr, cmd0[j]);
1488 vst1q_u64(lmt_addr + 2, cmd1[j]);
1490 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1501 static __rte_always_inline void
1502 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1503 uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1505 /* Go to next line if we are out of space */
1506 if ((*loff + (dw << 4)) > 128) {
1507 *data128 = *data128 |
1508 (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1509 *shift = *shift + 3;
1514 *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1515 *loff = *loff + (dw << 4);
1518 static __rte_always_inline void
1519 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1520 uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1521 uint64x2_t cmd3, const uint16_t flags)
1525 /* Handle no fast free when security is enabled without mseg */
1526 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1527 (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1528 !(flags & NIX_TX_MULTI_SEG_F)) {
1529 union nix_send_sg_s sg;
1531 sg.u = vgetq_lane_u64(cmd1, 0);
1532 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1533 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1535 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1536 sg.u = vgetq_lane_u64(cmd1, 0);
1537 if (!(sg.u & (1ULL << 55)))
1538 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1543 if (flags & NIX_TX_MULTI_SEG_F) {
1544 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1545 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1546 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1547 &cmd0, &cmd1, segdw, flags);
1548 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1549 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1550 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1553 vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1554 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1555 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1556 &cmd0, &cmd1, segdw, flags);
1557 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1558 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1559 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1561 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1562 &cmd0, &cmd1, segdw, flags);
1563 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1564 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1566 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1567 /* Store the prepared send desc to LMT lines */
1568 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1569 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1570 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1571 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1572 vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1574 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1575 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1576 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1579 /* Store the prepared send desc to LMT lines */
1580 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1581 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1585 static __rte_always_inline uint16_t
1586 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1587 struct rte_mbuf **tx_pkts, uint16_t pkts,
1588 uint64_t *cmd, const uint16_t flags)
1590 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1591 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1592 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1593 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1594 uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1595 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1596 uint64x2_t senddesc01_w0, senddesc23_w0;
1597 uint64x2_t senddesc01_w1, senddesc23_w1;
1598 uint64x2_t sendext01_w0, sendext23_w0;
1599 uint64x2_t sendext01_w1, sendext23_w1;
1600 uint64x2_t sendmem01_w0, sendmem23_w0;
1601 uint64x2_t sendmem01_w1, sendmem23_w1;
1602 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1603 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1604 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1605 struct cn10k_eth_txq *txq = tx_queue;
1606 rte_iova_t io_addr = txq->io_addr;
1607 uintptr_t laddr = txq->lmt_base;
1608 uint8_t c_lnum, c_shft, c_loff;
1609 uint64x2_t ltypes01, ltypes23;
1610 uint64x2_t xtmp128, ytmp128;
1611 uint64x2_t xmask01, xmask23;
1612 uintptr_t c_laddr = laddr;
1613 uint8_t lnum, shift, loff;
1614 rte_iova_t c_io_addr;
1617 __uint128_t data128;
1621 if (!(flags & NIX_TX_VWQE_F)) {
1622 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1623 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1624 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1625 /* Reduce the cached count */
1626 txq->fc_cache_pkts -= pkts;
1628 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1629 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1632 /* Perform header writes before barrier for TSO */
1633 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1634 for (i = 0; i < pkts; i++)
1635 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1638 if (!(flags & NIX_TX_VWQE_F)) {
1639 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1642 (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1643 ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1645 senddesc01_w0 = vdupq_n_u64(w0);
1647 senddesc23_w0 = senddesc01_w0;
1649 senddesc01_w1 = vdupq_n_u64(0);
1650 senddesc23_w1 = senddesc01_w1;
1651 sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1652 sgdesc23_w0 = sgdesc01_w0;
1654 if (flags & NIX_TX_NEED_EXT_HDR) {
1655 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1656 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1659 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1660 (NIX_SENDMEMALG_SETTSTMP << 56));
1661 sendmem23_w0 = sendmem01_w0;
1662 sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1663 sendmem23_w1 = sendmem01_w1;
1665 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1667 sendext23_w0 = sendext01_w0;
1669 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1670 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1672 sendext01_w1 = vdupq_n_u64(0);
1673 sendext23_w1 = sendext01_w1;
1676 /* Get LMT base address and LMT ID as lcore id */
1677 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1678 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1679 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1680 c_io_addr = txq->cpt_io_addr;
1681 sa_base = txq->sa_base;
1686 /* Number of packets to prepare depends on offloads enabled. */
1687 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1688 cn10k_nix_pkts_per_vec_brst(flags) :
1690 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1695 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1702 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1703 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1708 if (flags & NIX_TX_MULTI_SEG_F) {
1711 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1712 struct rte_mbuf *m = tx_pkts[j];
1714 /* Get dwords based on nb_segs. */
1715 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1716 /* Add dwords based on offloads. */
1717 segdw[j] += 1 + /* SEND HDR */
1718 !!(flags & NIX_TX_NEED_EXT_HDR) +
1719 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1722 /* Check if there are enough LMTLINES for this loop */
1723 if (lnum + 4 > 32) {
1724 uint8_t ldwords_con = 0, lneeded = 0;
1725 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1726 ldwords_con += segdw[j];
1727 if (ldwords_con > 8) {
1729 ldwords_con = segdw[j];
1733 if (lnum + lneeded > 32) {
1739 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1741 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1742 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1744 senddesc23_w0 = senddesc01_w0;
1745 sgdesc23_w0 = sgdesc01_w0;
1747 /* Clear vlan enables. */
1748 if (flags & NIX_TX_NEED_EXT_HDR) {
1749 sendext01_w1 = vbicq_u64(sendext01_w1,
1750 vdupq_n_u64(0x3FFFF00FFFF00));
1751 sendext23_w1 = sendext01_w1;
1754 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1755 /* Reset send mem alg to SETTSTMP from SUB*/
1756 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1757 vdupq_n_u64(BIT_ULL(59)));
1758 /* Reset send mem address to default. */
1760 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1761 sendmem23_w0 = sendmem01_w0;
1762 sendmem23_w1 = sendmem01_w1;
1765 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1766 /* Clear the LSO enable bit. */
1767 sendext01_w0 = vbicq_u64(sendext01_w0,
1768 vdupq_n_u64(BIT_ULL(14)));
1769 sendext23_w0 = sendext01_w0;
1772 /* Move mbufs to iova */
1773 mbuf0 = (uint64_t *)tx_pkts[0];
1774 mbuf1 = (uint64_t *)tx_pkts[1];
1775 mbuf2 = (uint64_t *)tx_pkts[2];
1776 mbuf3 = (uint64_t *)tx_pkts[3];
1778 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1779 offsetof(struct rte_mbuf, buf_iova));
1780 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1781 offsetof(struct rte_mbuf, buf_iova));
1782 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1783 offsetof(struct rte_mbuf, buf_iova));
1784 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1785 offsetof(struct rte_mbuf, buf_iova));
1787 * Get mbuf's, olflags, iova, pktlen, dataoff
1788 * dataoff_iovaX.D[0] = iova,
1789 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1790 * len_olflagsX.D[0] = ol_flags,
1791 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1793 dataoff_iova0 = vld1q_u64(mbuf0);
1794 len_olflags0 = vld1q_u64(mbuf0 + 2);
1795 dataoff_iova1 = vld1q_u64(mbuf1);
1796 len_olflags1 = vld1q_u64(mbuf1 + 2);
1797 dataoff_iova2 = vld1q_u64(mbuf2);
1798 len_olflags2 = vld1q_u64(mbuf2 + 2);
1799 dataoff_iova3 = vld1q_u64(mbuf3);
1800 len_olflags3 = vld1q_u64(mbuf3 + 2);
1802 /* Move mbufs to point pool */
1803 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1804 offsetof(struct rte_mbuf, pool) -
1805 offsetof(struct rte_mbuf, buf_iova));
1806 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1807 offsetof(struct rte_mbuf, pool) -
1808 offsetof(struct rte_mbuf, buf_iova));
1809 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1810 offsetof(struct rte_mbuf, pool) -
1811 offsetof(struct rte_mbuf, buf_iova));
1812 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1813 offsetof(struct rte_mbuf, pool) -
1814 offsetof(struct rte_mbuf, buf_iova));
1816 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1817 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1818 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1820 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1821 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1824 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1825 : [a] "+w"(senddesc01_w1)
1826 : [in] "r"(mbuf0 + 2)
1829 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1830 : [a] "+w"(senddesc01_w1)
1831 : [in] "r"(mbuf1 + 2)
1834 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1835 : [b] "+w"(senddesc23_w1)
1836 : [in] "r"(mbuf2 + 2)
1839 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1840 : [b] "+w"(senddesc23_w1)
1841 : [in] "r"(mbuf3 + 2)
1844 /* Get pool pointer alone */
1845 mbuf0 = (uint64_t *)*mbuf0;
1846 mbuf1 = (uint64_t *)*mbuf1;
1847 mbuf2 = (uint64_t *)*mbuf2;
1848 mbuf3 = (uint64_t *)*mbuf3;
1850 /* Get pool pointer alone */
1851 mbuf0 = (uint64_t *)*mbuf0;
1852 mbuf1 = (uint64_t *)*mbuf1;
1853 mbuf2 = (uint64_t *)*mbuf2;
1854 mbuf3 = (uint64_t *)*mbuf3;
1857 const uint8x16_t shuf_mask2 = {
1858 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1859 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1861 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1862 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1864 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1865 const uint64x2_t and_mask0 = {
1870 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1871 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1872 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1873 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1876 * Pick only 16 bits of pktlen preset at bits 63:32
1877 * and place them at bits 15:0.
1879 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1880 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1882 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1883 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1884 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1886 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1887 * pktlen at 15:0 position.
1889 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1890 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1891 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1892 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1894 /* Move mbuf to point to pool_id. */
1895 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1896 offsetof(struct rte_mempool, pool_id));
1897 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1898 offsetof(struct rte_mempool, pool_id));
1899 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1900 offsetof(struct rte_mempool, pool_id));
1901 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1902 offsetof(struct rte_mempool, pool_id));
1904 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1905 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1907 * Lookup table to translate ol_flags to
1908 * il3/il4 types. But we still use ol3/ol4 types in
1909 * senddesc_w1 as only one header processing is enabled.
1911 const uint8x16_t tbl = {
1912 /* [0-15] = il4type:il3type */
1913 0x04, /* none (IPv6 assumed) */
1914 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1915 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1916 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1917 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1918 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1919 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1920 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1921 0x02, /* RTE_MBUF_F_TX_IPV4 */
1922 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1923 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1924 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1925 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1926 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1927 * RTE_MBUF_F_TX_TCP_CKSUM
1929 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1930 * RTE_MBUF_F_TX_SCTP_CKSUM
1932 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1933 * RTE_MBUF_F_TX_UDP_CKSUM
1937 /* Extract olflags to translate to iltypes */
1938 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1939 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1942 * E(47):L3_LEN(9):L2_LEN(7+z)
1943 * E(47):L3_LEN(9):L2_LEN(7+z)
1945 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1946 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1948 /* Move OLFLAGS bits 55:52 to 51:48
1949 * with zeros preprended on the byte and rest
1952 xtmp128 = vshrq_n_u8(xtmp128, 4);
1953 ytmp128 = vshrq_n_u8(ytmp128, 4);
1955 * E(48):L3_LEN(8):L2_LEN(z+7)
1956 * E(48):L3_LEN(8):L2_LEN(z+7)
1958 const int8x16_t tshft3 = {
1959 -1, 0, 8, 8, 8, 8, 8, 8,
1960 -1, 0, 8, 8, 8, 8, 8, 8,
1963 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1964 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1967 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1968 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1970 /* Pick only relevant fields i.e Bit 48:55 of iltype
1971 * and place it in ol3/ol4type of senddesc_w1
1973 const uint8x16_t shuf_mask0 = {
1974 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1975 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1978 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1979 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1981 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1982 * a [E(32):E(16):OL3(8):OL2(8)]
1984 * a [E(32):E(16):(OL3+OL2):OL2]
1985 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1987 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1988 vshlq_n_u16(senddesc01_w1, 8));
1989 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1990 vshlq_n_u16(senddesc23_w1, 8));
1992 /* Move ltypes to senddesc*_w1 */
1993 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1994 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1995 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1996 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1998 * Lookup table to translate ol_flags to
2002 const uint8x16_t tbl = {
2003 /* [0-15] = ol4type:ol3type */
2005 0x03, /* OUTER_IP_CKSUM */
2006 0x02, /* OUTER_IPV4 */
2007 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2008 0x04, /* OUTER_IPV6 */
2009 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2010 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2011 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2014 0x00, /* OUTER_UDP_CKSUM */
2015 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
2016 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
2017 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
2020 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
2021 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2024 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2027 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2028 * OUTER_IPV4 | OUTER_IP_CKSUM
2032 /* Extract olflags to translate to iltypes */
2033 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2034 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2037 * E(47):OL3_LEN(9):OL2_LEN(7+z)
2038 * E(47):OL3_LEN(9):OL2_LEN(7+z)
2040 const uint8x16_t shuf_mask5 = {
2041 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2042 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2044 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2045 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2047 /* Extract outer ol flags only */
2048 const uint64x2_t o_cksum_mask = {
2053 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
2054 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
2056 /* Extract OUTER_UDP_CKSUM bit 41 and
2060 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2061 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2063 /* Shift oltype by 2 to start nibble from BIT(56)
2064 * instead of BIT(58)
2066 xtmp128 = vshrq_n_u8(xtmp128, 2);
2067 ytmp128 = vshrq_n_u8(ytmp128, 2);
2069 * E(48):L3_LEN(8):L2_LEN(z+7)
2070 * E(48):L3_LEN(8):L2_LEN(z+7)
2072 const int8x16_t tshft3 = {
2073 -1, 0, 8, 8, 8, 8, 8, 8,
2074 -1, 0, 8, 8, 8, 8, 8, 8,
2077 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2078 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2081 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
2082 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
2084 /* Pick only relevant fields i.e Bit 56:63 of oltype
2085 * and place it in ol3/ol4type of senddesc_w1
2087 const uint8x16_t shuf_mask0 = {
2088 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
2089 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
2092 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2093 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2095 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
2096 * a [E(32):E(16):OL3(8):OL2(8)]
2098 * a [E(32):E(16):(OL3+OL2):OL2]
2099 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
2101 senddesc01_w1 = vaddq_u8(senddesc01_w1,
2102 vshlq_n_u16(senddesc01_w1, 8));
2103 senddesc23_w1 = vaddq_u8(senddesc23_w1,
2104 vshlq_n_u16(senddesc23_w1, 8));
2106 /* Move ltypes to senddesc*_w1 */
2107 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2108 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2109 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
2110 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
2111 /* Lookup table to translate ol_flags to
2112 * ol4type, ol3type, il4type, il3type of senddesc_w1
2114 const uint8x16x2_t tbl = {{
2116 /* [0-15] = il4type:il3type */
2117 0x04, /* none (IPv6) */
2118 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
2119 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
2120 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
2121 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
2122 0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
2123 * RTE_MBUF_F_TX_TCP_CKSUM
2125 0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2126 * RTE_MBUF_F_TX_SCTP_CKSUM
2128 0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2129 * RTE_MBUF_F_TX_UDP_CKSUM
2131 0x02, /* RTE_MBUF_F_TX_IPV4 */
2132 0x12, /* RTE_MBUF_F_TX_IPV4 |
2133 * RTE_MBUF_F_TX_TCP_CKSUM
2135 0x22, /* RTE_MBUF_F_TX_IPV4 |
2136 * RTE_MBUF_F_TX_SCTP_CKSUM
2138 0x32, /* RTE_MBUF_F_TX_IPV4 |
2139 * RTE_MBUF_F_TX_UDP_CKSUM
2141 0x03, /* RTE_MBUF_F_TX_IPV4 |
2142 * RTE_MBUF_F_TX_IP_CKSUM
2144 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2145 * RTE_MBUF_F_TX_TCP_CKSUM
2147 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2148 * RTE_MBUF_F_TX_SCTP_CKSUM
2150 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2151 * RTE_MBUF_F_TX_UDP_CKSUM
2156 /* [16-31] = ol4type:ol3type */
2158 0x03, /* OUTER_IP_CKSUM */
2159 0x02, /* OUTER_IPV4 */
2160 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2161 0x04, /* OUTER_IPV6 */
2162 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2163 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2164 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2167 0x00, /* OUTER_UDP_CKSUM */
2168 0x33, /* OUTER_UDP_CKSUM |
2171 0x32, /* OUTER_UDP_CKSUM |
2174 0x33, /* OUTER_UDP_CKSUM |
2175 * OUTER_IPV4 | OUTER_IP_CKSUM
2177 0x34, /* OUTER_UDP_CKSUM |
2180 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2183 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2186 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2187 * OUTER_IPV4 | OUTER_IP_CKSUM
2192 /* Extract olflags to translate to oltype & iltype */
2193 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2194 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2197 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2198 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2200 const uint32x4_t tshft_4 = {
2206 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2207 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2210 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2211 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2213 const uint8x16_t shuf_mask5 = {
2214 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2215 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2217 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2218 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2220 /* Extract outer and inner header ol_flags */
2221 const uint64x2_t oi_cksum_mask = {
2226 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2227 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2229 /* Extract OUTER_UDP_CKSUM bit 41 and
2233 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2234 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2236 /* Shift right oltype by 2 and iltype by 4
2237 * to start oltype nibble from BIT(58)
2238 * instead of BIT(56) and iltype nibble from BIT(48)
2239 * instead of BIT(52).
2241 const int8x16_t tshft5 = {
2242 8, 8, 8, 8, 8, 8, -4, -2,
2243 8, 8, 8, 8, 8, 8, -4, -2,
2246 xtmp128 = vshlq_u8(xtmp128, tshft5);
2247 ytmp128 = vshlq_u8(ytmp128, tshft5);
2249 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2250 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2252 const int8x16_t tshft3 = {
2253 -1, 0, -1, 0, 0, 0, 0, 0,
2254 -1, 0, -1, 0, 0, 0, 0, 0,
2257 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2258 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2260 /* Mark Bit(4) of oltype */
2261 const uint64x2_t oi_cksum_mask2 = {
2266 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2267 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2270 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2271 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2273 /* Pick only relevant fields i.e Bit 48:55 of iltype and
2274 * Bit 56:63 of oltype and place it in corresponding
2275 * place in senddesc_w1.
2277 const uint8x16_t shuf_mask0 = {
2278 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2279 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2282 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2283 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2285 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2286 * l3len, l2len, ol3len, ol2len.
2287 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2289 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2291 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2292 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2294 senddesc01_w1 = vaddq_u8(senddesc01_w1,
2295 vshlq_n_u32(senddesc01_w1, 8));
2296 senddesc23_w1 = vaddq_u8(senddesc23_w1,
2297 vshlq_n_u32(senddesc23_w1, 8));
2299 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2300 senddesc01_w1 = vaddq_u8(
2301 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2302 senddesc23_w1 = vaddq_u8(
2303 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2305 /* Move ltypes to senddesc*_w1 */
2306 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2307 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2310 xmask01 = vdupq_n_u64(0);
2312 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2317 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2322 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2327 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2331 xmask01 = vshlq_n_u64(xmask01, 20);
2332 xmask23 = vshlq_n_u64(xmask23, 20);
2334 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2335 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2337 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2338 /* Tx ol_flag for vlan. */
2339 const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2340 /* Bit enable for VLAN1 */
2341 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2342 /* Tx ol_flag for QnQ. */
2343 const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2344 /* Bit enable for VLAN0 */
2345 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2346 /* Load vlan values from packet. outer is VLAN 0 */
2347 uint64x2_t ext01 = {
2348 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2349 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2350 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2351 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2353 uint64x2_t ext23 = {
2354 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2355 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2356 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2357 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2360 /* Get ol_flags of the packets. */
2361 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2362 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2364 /* ORR vlan outer/inner values into cmd. */
2365 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2366 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2368 /* Test for offload enable bits and generate masks. */
2369 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2371 vandq_u64(vtstq_u64(xtmp128, olq),
2373 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2375 vandq_u64(vtstq_u64(ytmp128, olq),
2378 /* Set vlan enable bits into cmd based on mask. */
2379 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2380 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2383 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2384 /* Tx ol_flag for timestamp. */
2385 const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2386 RTE_MBUF_F_TX_IEEE1588_TMST};
2387 /* Set send mem alg to SUB. */
2388 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2389 /* Increment send mem address by 8. */
2390 const uint64x2_t addr = {0x8, 0x8};
2392 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2393 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2395 /* Check if timestamp is requested and generate inverted
2396 * mask as we need not make any changes to default cmd
2399 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2400 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2402 /* Change send mem address to an 8 byte offset when
2403 * TSTMP is disabled.
2405 sendmem01_w1 = vaddq_u64(sendmem01_w1,
2406 vandq_u64(xtmp128, addr));
2407 sendmem23_w1 = vaddq_u64(sendmem23_w1,
2408 vandq_u64(ytmp128, addr));
2409 /* Change send mem alg to SUB when TSTMP is disabled. */
2410 sendmem01_w0 = vorrq_u64(sendmem01_w0,
2411 vandq_u64(xtmp128, alg));
2412 sendmem23_w0 = vorrq_u64(sendmem23_w0,
2413 vandq_u64(ytmp128, alg));
2415 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2416 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2417 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2418 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2421 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2422 const uint64_t lso_fmt = txq->lso_tun_fmt;
2423 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2424 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2426 /* Extract SD W1 as we need to set L4 types. */
2427 vst1q_u64(sd_w1, senddesc01_w1);
2428 vst1q_u64(sd_w1 + 2, senddesc23_w1);
2430 /* Extract SX W0 as we need to set LSO fields. */
2431 vst1q_u64(sx_w0, sendext01_w0);
2432 vst1q_u64(sx_w0 + 2, sendext23_w0);
2434 /* Extract ol_flags. */
2435 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2436 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2438 /* Prepare individual mbufs. */
2439 cn10k_nix_prepare_tso(tx_pkts[0],
2440 (union nix_send_hdr_w1_u *)&sd_w1[0],
2441 (union nix_send_ext_w0_u *)&sx_w0[0],
2442 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2444 cn10k_nix_prepare_tso(tx_pkts[1],
2445 (union nix_send_hdr_w1_u *)&sd_w1[1],
2446 (union nix_send_ext_w0_u *)&sx_w0[1],
2447 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2449 cn10k_nix_prepare_tso(tx_pkts[2],
2450 (union nix_send_hdr_w1_u *)&sd_w1[2],
2451 (union nix_send_ext_w0_u *)&sx_w0[2],
2452 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2454 cn10k_nix_prepare_tso(tx_pkts[3],
2455 (union nix_send_hdr_w1_u *)&sd_w1[3],
2456 (union nix_send_ext_w0_u *)&sx_w0[3],
2457 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2459 senddesc01_w1 = vld1q_u64(sd_w1);
2460 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2462 sendext01_w0 = vld1q_u64(sx_w0);
2463 sendext23_w0 = vld1q_u64(sx_w0 + 2);
2466 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2467 !(flags & NIX_TX_MULTI_SEG_F) &&
2468 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2469 /* Set don't free bit if reference count > 1 */
2470 xmask01 = vdupq_n_u64(0);
2473 /* Move mbufs to iova */
2474 mbuf0 = (uint64_t *)tx_pkts[0];
2475 mbuf1 = (uint64_t *)tx_pkts[1];
2476 mbuf2 = (uint64_t *)tx_pkts[2];
2477 mbuf3 = (uint64_t *)tx_pkts[3];
2479 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2480 vsetq_lane_u64(0x80000, xmask01, 0);
2482 RTE_MEMPOOL_CHECK_COOKIES(
2483 ((struct rte_mbuf *)mbuf0)->pool,
2484 (void **)&mbuf0, 1, 0);
2486 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2487 vsetq_lane_u64(0x80000, xmask01, 1);
2489 RTE_MEMPOOL_CHECK_COOKIES(
2490 ((struct rte_mbuf *)mbuf1)->pool,
2491 (void **)&mbuf1, 1, 0);
2493 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2494 vsetq_lane_u64(0x80000, xmask23, 0);
2496 RTE_MEMPOOL_CHECK_COOKIES(
2497 ((struct rte_mbuf *)mbuf2)->pool,
2498 (void **)&mbuf2, 1, 0);
2500 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2501 vsetq_lane_u64(0x80000, xmask23, 1);
2503 RTE_MEMPOOL_CHECK_COOKIES(
2504 ((struct rte_mbuf *)mbuf3)->pool,
2505 (void **)&mbuf3, 1, 0);
2506 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2507 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2508 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2509 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2510 /* Move mbufs to iova */
2511 mbuf0 = (uint64_t *)tx_pkts[0];
2512 mbuf1 = (uint64_t *)tx_pkts[1];
2513 mbuf2 = (uint64_t *)tx_pkts[2];
2514 mbuf3 = (uint64_t *)tx_pkts[3];
2516 /* Mark mempool object as "put" since
2517 * it is freed by NIX
2519 RTE_MEMPOOL_CHECK_COOKIES(
2520 ((struct rte_mbuf *)mbuf0)->pool,
2521 (void **)&mbuf0, 1, 0);
2523 RTE_MEMPOOL_CHECK_COOKIES(
2524 ((struct rte_mbuf *)mbuf1)->pool,
2525 (void **)&mbuf1, 1, 0);
2527 RTE_MEMPOOL_CHECK_COOKIES(
2528 ((struct rte_mbuf *)mbuf2)->pool,
2529 (void **)&mbuf2, 1, 0);
2531 RTE_MEMPOOL_CHECK_COOKIES(
2532 ((struct rte_mbuf *)mbuf3)->pool,
2533 (void **)&mbuf3, 1, 0);
2536 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2537 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2538 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2539 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2540 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2542 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2543 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2544 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2545 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2547 if (flags & NIX_TX_NEED_EXT_HDR) {
2548 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2549 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2550 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2551 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2554 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2555 const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2556 RTE_MBUF_F_TX_SEC_OFFLOAD};
2560 /* Extract ol_flags. */
2561 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2562 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2564 xtmp128 = vtstq_u64(olf, xtmp128);
2565 ytmp128 = vtstq_u64(olf, ytmp128);
2568 dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2569 if (vgetq_lane_u64(xtmp128, 0))
2570 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2571 &cmd1[0], &next, c_laddr,
2573 &c_shft, sa_base, flags);
2575 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2576 &shift, &wd.data128, &next);
2578 /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2579 cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2580 cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2584 dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2585 if (vgetq_lane_u64(xtmp128, 1))
2586 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2587 &cmd1[1], &next, c_laddr,
2589 &c_shft, sa_base, flags);
2591 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2592 &shift, &wd.data128, &next);
2594 /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2595 cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2596 cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2600 dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2601 if (vgetq_lane_u64(ytmp128, 0))
2602 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2603 &cmd1[2], &next, c_laddr,
2605 &c_shft, sa_base, flags);
2607 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2608 &shift, &wd.data128, &next);
2610 /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2611 cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2612 cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2616 dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2617 if (vgetq_lane_u64(ytmp128, 1))
2618 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2619 &cmd1[3], &next, c_laddr,
2621 &c_shft, sa_base, flags);
2623 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2624 &shift, &wd.data128, &next);
2626 /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2627 cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2628 cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2631 } else if (flags & NIX_TX_MULTI_SEG_F) {
2635 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2638 LMT_OFF(laddr, lnum,
2640 &wd.data128, &shift,
2643 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2644 /* Store the prepared send desc to LMT lines */
2645 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2646 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2647 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2648 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2649 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2650 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2651 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2652 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2653 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2655 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2656 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2657 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2658 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2659 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2660 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2661 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2662 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2664 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2665 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2666 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2667 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2668 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2669 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2671 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2672 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2673 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2674 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2675 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2676 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2680 /* Store the prepared send desc to LMT lines */
2681 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2682 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2683 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2684 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2685 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2686 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2687 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2688 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2692 if (flags & NIX_TX_MULTI_SEG_F) {
2693 tx_pkts[0]->next = NULL;
2694 tx_pkts[1]->next = NULL;
2695 tx_pkts[2]->next = NULL;
2696 tx_pkts[3]->next = NULL;
2699 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2702 /* Roundup lnum to last line if it is partial */
2703 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2704 lnum = lnum + !!loff;
2705 wd.data128 = wd.data128 |
2706 (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2709 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2712 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
2713 ws[1] = roc_sso_hws_head_wait(ws[0]);
2717 /* Submit CPT instructions if any */
2718 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2719 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
2720 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2726 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2727 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2729 pa = io_addr | (wd.data[0] & 0x7) << 4;
2730 wd.data[0] &= ~0x7ULL;
2732 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2735 wd.data[0] |= (15ULL << 12);
2736 wd.data[0] |= (uint64_t)lmt_id;
2739 roc_lmt_submit_steorl(wd.data[0], pa);
2741 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2742 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2744 pa = io_addr | (wd.data[1] & 0x7) << 4;
2745 wd.data[1] &= ~0x7ULL;
2747 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2750 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2751 wd.data[1] |= (uint64_t)(lmt_id + 16);
2754 roc_lmt_submit_steorl(wd.data[1], pa);
2756 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2757 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2759 pa = io_addr | (wd.data[0] & 0x7) << 4;
2760 wd.data[0] &= ~0x7ULL;
2762 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2765 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2766 wd.data[0] |= lmt_id;
2769 roc_lmt_submit_steorl(wd.data[0], pa);
2776 if (unlikely(scalar)) {
2777 if (flags & NIX_TX_MULTI_SEG_F)
2778 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2779 scalar, cmd, flags);
2781 pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2782 scalar, cmd, flags);
2789 static __rte_always_inline uint16_t
2790 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2791 struct rte_mbuf **tx_pkts, uint16_t pkts,
2792 uint64_t *cmd, const uint16_t flags)
2795 RTE_SET_USED(tx_queue);
2796 RTE_SET_USED(tx_pkts);
2799 RTE_SET_USED(flags);
2804 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2805 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2806 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2807 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2808 #define TSO_F NIX_TX_OFFLOAD_TSO_F
2809 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2810 #define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F
2812 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2813 #define NIX_TX_FASTPATH_MODES_0_15 \
2814 T(no_offload, 6, NIX_TX_OFFLOAD_NONE) \
2815 T(l3l4csum, 6, L3L4CSUM_F) \
2816 T(ol3ol4csum, 6, OL3OL4CSUM_F) \
2817 T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F) \
2818 T(vlan, 6, VLAN_F) \
2819 T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F) \
2820 T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F) \
2821 T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2822 T(noff, 6, NOFF_F) \
2823 T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F) \
2824 T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F) \
2825 T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2826 T(noff_vlan, 6, NOFF_F | VLAN_F) \
2827 T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F) \
2828 T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2829 T(noff_vlan_ol3ol4csum_l3l4csum, 6, \
2830 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2832 #define NIX_TX_FASTPATH_MODES_16_31 \
2834 T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F) \
2835 T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F) \
2836 T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2837 T(tso_vlan, 6, TSO_F | VLAN_F) \
2838 T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F) \
2839 T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F) \
2840 T(tso_vlan_ol3ol4csum_l3l4csum, 6, \
2841 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2842 T(tso_noff, 6, TSO_F | NOFF_F) \
2843 T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F) \
2844 T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F) \
2845 T(tso_noff_ol3ol4csum_l3l4csum, 6, \
2846 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2847 T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F) \
2848 T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2849 T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2850 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2851 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2853 #define NIX_TX_FASTPATH_MODES_32_47 \
2855 T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F) \
2856 T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F) \
2857 T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2858 T(ts_vlan, 8, TSP_F | VLAN_F) \
2859 T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F) \
2860 T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F) \
2861 T(ts_vlan_ol3ol4csum_l3l4csum, 8, \
2862 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2863 T(ts_noff, 8, TSP_F | NOFF_F) \
2864 T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F) \
2865 T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F) \
2866 T(ts_noff_ol3ol4csum_l3l4csum, 8, \
2867 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2868 T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F) \
2869 T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2870 T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2871 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2872 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2874 #define NIX_TX_FASTPATH_MODES_48_63 \
2875 T(ts_tso, 8, TSP_F | TSO_F) \
2876 T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F) \
2877 T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F) \
2878 T(ts_tso_ol3ol4csum_l3l4csum, 8, \
2879 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2880 T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F) \
2881 T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2882 T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2883 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2884 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2885 T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F) \
2886 T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2887 T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2888 T(ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2889 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2890 T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F) \
2891 T(ts_tso_noff_vlan_l3l4csum, 8, \
2892 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2893 T(ts_tso_noff_vlan_ol3ol4csum, 8, \
2894 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2895 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2896 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2898 #define NIX_TX_FASTPATH_MODES_64_79 \
2899 T(sec, 6, T_SEC_F) \
2900 T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F) \
2901 T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F) \
2902 T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2903 T(sec_vlan, 6, T_SEC_F | VLAN_F) \
2904 T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F) \
2905 T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F) \
2906 T(sec_vlan_ol3ol4csum_l3l4csum, 6, \
2907 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2908 T(sec_noff, 6, T_SEC_F | NOFF_F) \
2909 T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F) \
2910 T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F) \
2911 T(sec_noff_ol3ol4csum_l3l4csum, 6, \
2912 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2913 T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F) \
2914 T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2915 T(sec_noff_vlan_ol3ol4csum, 6, \
2916 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2917 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2918 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2920 #define NIX_TX_FASTPATH_MODES_80_95 \
2921 T(sec_tso, 6, T_SEC_F | TSO_F) \
2922 T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F) \
2923 T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F) \
2924 T(sec_tso_ol3ol4csum_l3l4csum, 6, \
2925 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2926 T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F) \
2927 T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2928 T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2929 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6, \
2930 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2931 T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F) \
2932 T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2933 T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2934 T(sec_tso_noff_ol3ol4csum_l3l4csum, 6, \
2935 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2936 T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F) \
2937 T(sec_tso_noff_vlan_l3l4csum, 6, \
2938 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2939 T(sec_tso_noff_vlan_ol3ol4csum, 6, \
2940 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2941 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2942 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2944 #define NIX_TX_FASTPATH_MODES_96_111 \
2945 T(sec_ts, 8, T_SEC_F | TSP_F) \
2946 T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F) \
2947 T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F) \
2948 T(sec_ts_ol3ol4csum_l3l4csum, 8, \
2949 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2950 T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F) \
2951 T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \
2952 T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \
2953 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8, \
2954 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2955 T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F) \
2956 T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \
2957 T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \
2958 T(sec_ts_noff_ol3ol4csum_l3l4csum, 8, \
2959 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2960 T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F) \
2961 T(sec_ts_noff_vlan_l3l4csum, 8, \
2962 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2963 T(sec_ts_noff_vlan_ol3ol4csum, 8, \
2964 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2965 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2966 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2968 #define NIX_TX_FASTPATH_MODES_112_127 \
2969 T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F) \
2970 T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \
2971 T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \
2972 T(sec_ts_tso_ol3ol4csum_l3l4csum, 8, \
2973 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2974 T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F) \
2975 T(sec_ts_tso_vlan_l3l4csum, 8, \
2976 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2977 T(sec_ts_tso_vlan_ol3ol4csum, 8, \
2978 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2979 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2980 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2981 T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F) \
2982 T(sec_ts_tso_noff_l3l4csum, 8, \
2983 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2984 T(sec_ts_tso_noff_ol3ol4csum, 8, \
2985 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2986 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2987 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2988 T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \
2989 T(sec_ts_tso_noff_vlan_l3l4csum, 8, \
2990 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2991 T(sec_ts_tso_noff_vlan_ol3ol4csum, 8, \
2992 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2993 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2994 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2997 #define NIX_TX_FASTPATH_MODES \
2998 NIX_TX_FASTPATH_MODES_0_15 \
2999 NIX_TX_FASTPATH_MODES_16_31 \
3000 NIX_TX_FASTPATH_MODES_32_47 \
3001 NIX_TX_FASTPATH_MODES_48_63 \
3002 NIX_TX_FASTPATH_MODES_64_79 \
3003 NIX_TX_FASTPATH_MODES_80_95 \
3004 NIX_TX_FASTPATH_MODES_96_111 \
3005 NIX_TX_FASTPATH_MODES_112_127
3007 #define T(name, sz, flags) \
3008 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
3009 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
3010 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
3011 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
3012 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
3013 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
3014 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
3015 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
3017 NIX_TX_FASTPATH_MODES
3020 #define NIX_TX_XMIT(fn, sz, flags) \
3021 uint16_t __rte_noinline __rte_hot fn( \
3022 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
3025 /* For TSO inner checksum is a must */ \
3026 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
3027 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
3029 return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
3033 #define NIX_TX_XMIT_MSEG(fn, sz, flags) \
3034 uint16_t __rte_noinline __rte_hot fn( \
3035 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
3037 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
3038 /* For TSO inner checksum is a must */ \
3039 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
3040 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
3042 return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
3044 flags | NIX_TX_MULTI_SEG_F); \
3047 #define NIX_TX_XMIT_VEC(fn, sz, flags) \
3048 uint16_t __rte_noinline __rte_hot fn( \
3049 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
3052 /* For TSO inner checksum is a must */ \
3053 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
3054 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
3056 return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts, \
3057 pkts, cmd, (flags)); \
3060 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags) \
3061 uint16_t __rte_noinline __rte_hot fn( \
3062 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
3064 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
3065 /* For TSO inner checksum is a must */ \
3066 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
3067 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
3069 return cn10k_nix_xmit_pkts_vector( \
3070 tx_queue, NULL, tx_pkts, pkts, cmd, \
3071 (flags) | NIX_TX_MULTI_SEG_F); \
3074 #endif /* __CN10K_TX_H__ */