1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #include <rte_eventdev.h>
11 #define NIX_TX_OFFLOAD_NONE (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
20 /* Flags to control xmit_prepare function.
21 * Defining it from backwards to denote its been
22 * not used as offload flags to pick function
24 #define NIX_TX_VWQE_F BIT(14)
25 #define NIX_TX_MULTI_SEG_F BIT(15)
27 #define NIX_TX_NEED_SEND_HDR_W1 \
28 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
29 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31 #define NIX_TX_NEED_EXT_HDR \
32 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
35 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
37 /* Cached value is low, Update the fc_cache_pkts */ \
38 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
39 /* Multiply with sqe_per_sqb to express in pkts */ \
40 (txq)->fc_cache_pkts = \
41 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
42 << (txq)->sqes_per_sqb_log2; \
43 /* Check it again for the room */ \
44 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
49 /* Encoded number of segments to number of dwords macro, each value of nb_segs
50 * is encoded as 4bits.
52 #define NIX_SEGDW_MAGIC 0x76654432210ULL
54 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56 /* Function to determine no of tx subdesc required in case ext
57 * sub desc is enabled.
59 static __rte_always_inline int
60 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
65 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
70 static __rte_always_inline uint8_t
71 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73 if (!(flags & NIX_TX_MULTI_SEG_F))
74 return cn10k_nix_tx_ext_subs(flags) + 2;
76 /* Already everything is accounted for in segdw */
80 static __rte_always_inline uint8_t
81 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
84 << ROC_LMT_LINES_PER_CORE_LOG2;
87 static __rte_always_inline uint8_t
88 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90 return (flags & NIX_TX_NEED_EXT_HDR) ?
91 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
95 static __rte_always_inline uint64_t
96 cn10k_nix_tx_steor_data(const uint16_t flags)
98 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
101 /* This will be moved to addr area */
103 /* 15 vector sizes for single seg */
123 static __rte_always_inline uint8_t
124 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126 return ((flags & NIX_TX_NEED_EXT_HDR) ?
127 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
131 static __rte_always_inline uint64_t
132 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
137 /* This will be moved to addr area */
139 /* 15 vector sizes for single seg */
159 static __rte_always_inline uint64_t
160 cn10k_cpt_tx_steor_data(void)
162 /* We have two CPT instructions per LMTLine */
163 const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
166 /* This will be moved to addr area */
187 static __rte_always_inline void
188 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
189 const uint16_t flags)
192 cmd[0] = txq->send_hdr_w0;
196 /* Send ext if present */
197 if (flags & NIX_TX_NEED_EXT_HDR) {
198 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
207 static __rte_always_inline void
208 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
209 uint8_t loff, uint8_t shft)
214 /* Check if there is any CPT instruction to submit */
218 data = cn10k_cpt_tx_steor_data();
219 /* Update lmtline use for partial end line */
221 data &= ~(0x7ULL << shft);
222 /* Update it to half full i.e 64B */
223 data |= (0x3UL << shft);
226 pa = io_addr | ((data >> 16) & 0x7) << 4;
227 data &= ~(0x7ULL << 16);
228 /* Update lines - 1 that contain valid data */
229 data |= ((uint64_t)(lnum + loff - 1)) << 12;
233 roc_lmt_submit_steorl(data, pa);
236 #if defined(RTE_ARCH_ARM64)
237 static __rte_always_inline void
238 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
239 uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
240 uint8_t *loff, uint8_t *shft, uint64_t sa_base,
241 const uint16_t flags)
243 struct cn10k_sec_sess_priv sess_priv;
244 uint32_t pkt_len, dlen_adj, rlen;
245 uint64x2_t cmd01, cmd23;
246 uintptr_t dptr, nixtx;
247 uint64_t ucode_cmd[4];
253 sess_priv.u64 = *rte_security_dynfield(m);
255 if (flags & NIX_TX_NEED_SEND_HDR_W1)
256 l2_len = vgetq_lane_u8(*cmd0, 8);
261 dptr = vgetq_lane_u64(*cmd1, 1);
262 pkt_len = vgetq_lane_u16(*cmd0, 0);
264 /* Calculate dlen adj */
265 dlen_adj = pkt_len - l2_len;
266 rlen = (dlen_adj + sess_priv.roundup_len) +
267 (sess_priv.roundup_byte - 1);
268 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
269 rlen += sess_priv.partial_len;
270 dlen_adj = rlen - dlen_adj;
272 /* Update send descriptors. Security is single segment only */
273 *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
274 *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
276 /* Get area where NIX descriptor needs to be stored */
277 nixtx = dptr + pkt_len + dlen_adj;
279 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
281 /* Return nixtx addr */
282 *nixtx_addr = (nixtx + 16);
284 /* DLEN passed is excluding L2HDR */
286 tag = sa_base & 0xFFFFUL;
287 sa_base &= ~0xFFFFUL;
288 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
289 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
291 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
293 /* CPT Word 0 and Word 1 */
294 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
295 /* CPT_RES_S is 16B above NIXTX */
296 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
298 /* CPT word 2 and 3 */
299 cmd23 = vdupq_n_u64(0);
300 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
301 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
302 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
308 /* Move to our line */
309 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
311 /* Write CPT instruction to lmt line */
312 vst1q_u64(laddr, cmd01);
313 vst1q_u64((laddr + 2), cmd23);
315 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
316 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
318 /* Move to next line for every other CPT inst */
320 *lnum = *lnum + (*loff ? 0 : 1);
321 *shft = *shft + (*loff ? 0 : 3);
324 static __rte_always_inline void
325 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
326 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
327 uint64_t sa_base, const uint16_t flags)
329 struct cn10k_sec_sess_priv sess_priv;
330 uint32_t pkt_len, dlen_adj, rlen;
331 struct nix_send_hdr_s *send_hdr;
332 uint64x2_t cmd01, cmd23;
333 union nix_send_sg_s *sg;
334 uintptr_t dptr, nixtx;
335 uint64_t ucode_cmd[4];
341 /* Move to our line from base */
342 sess_priv.u64 = *rte_security_dynfield(m);
343 send_hdr = (struct nix_send_hdr_s *)cmd;
344 if (flags & NIX_TX_NEED_EXT_HDR)
345 sg = (union nix_send_sg_s *)&cmd[4];
347 sg = (union nix_send_sg_s *)&cmd[2];
349 if (flags & NIX_TX_NEED_SEND_HDR_W1)
350 l2_len = cmd[1] & 0xFF;
355 dptr = *(uint64_t *)(sg + 1);
356 pkt_len = send_hdr->w0.total;
358 /* Calculate dlen adj */
359 dlen_adj = pkt_len - l2_len;
360 rlen = (dlen_adj + sess_priv.roundup_len) +
361 (sess_priv.roundup_byte - 1);
362 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
363 rlen += sess_priv.partial_len;
364 dlen_adj = rlen - dlen_adj;
366 /* Update send descriptors. Security is single segment only */
367 send_hdr->w0.total = pkt_len + dlen_adj;
368 sg->seg1_size = pkt_len + dlen_adj;
370 /* Get area where NIX descriptor needs to be stored */
371 nixtx = dptr + pkt_len + dlen_adj;
373 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
375 /* Return nixtx addr */
376 *nixtx_addr = (nixtx + 16);
378 /* DLEN passed is excluding L2HDR */
380 tag = sa_base & 0xFFFFUL;
381 sa_base &= ~0xFFFFUL;
382 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
383 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
385 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
387 /* CPT Word 0 and Word 1. Assume no multi-seg support */
388 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
389 /* CPT_RES_S is 16B above NIXTX */
390 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
392 /* CPT word 2 and 3 */
393 cmd23 = vdupq_n_u64(0);
394 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
395 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
396 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
402 /* Move to our line */
403 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
405 /* Write CPT instruction to lmt line */
406 vst1q_u64(laddr, cmd01);
407 vst1q_u64((laddr + 2), cmd23);
409 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
410 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
412 /* Move to next line for every other CPT inst */
414 *lnum = *lnum + (*loff ? 0 : 1);
415 *shft = *shft + (*loff ? 0 : 3);
420 static __rte_always_inline void
421 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
422 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
423 uint64_t sa_base, const uint16_t flags)
427 RTE_SET_USED(nixtx_addr);
432 RTE_SET_USED(sa_base);
437 static __rte_always_inline void
438 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
440 uint64_t mask, ol_flags = m->ol_flags;
442 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
443 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
444 uint16_t *iplen, *oiplen, *oudplen;
445 uint16_t lso_sb, paylen;
447 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
448 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
449 m->l2_len + m->l3_len + m->l4_len;
451 /* Reduce payload len from base headers */
452 paylen = m->pkt_len - lso_sb;
454 /* Get iplen position assuming no tunnel hdr */
455 iplen = (uint16_t *)(mdata + m->l2_len +
456 (2 << !!(ol_flags & PKT_TX_IPV6)));
457 /* Handle tunnel tso */
458 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
459 (ol_flags & PKT_TX_TUNNEL_MASK)) {
460 const uint8_t is_udp_tun =
461 (CNXK_NIX_UDP_TUN_BITMASK >>
462 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
465 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
467 PKT_TX_OUTER_IPV6)));
468 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
471 /* Update format for UDP tunneled packet */
473 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
474 m->outer_l3_len + 4);
475 *oudplen = rte_cpu_to_be_16(
476 rte_be_to_cpu_16(*oudplen) - paylen);
479 /* Update iplen position to inner ip hdr */
480 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
482 (2 << !!(ol_flags & PKT_TX_IPV6)));
485 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
489 static __rte_always_inline void
490 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
491 const uint64_t lso_tun_fmt, bool *sec)
493 struct nix_send_ext_s *send_hdr_ext;
494 struct nix_send_hdr_s *send_hdr;
495 uint64_t ol_flags = 0, mask;
496 union nix_send_hdr_w1_u w1;
497 union nix_send_sg_s *sg;
499 send_hdr = (struct nix_send_hdr_s *)cmd;
500 if (flags & NIX_TX_NEED_EXT_HDR) {
501 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
502 sg = (union nix_send_sg_s *)(cmd + 4);
503 /* Clear previous markings */
504 send_hdr_ext->w0.lso = 0;
505 send_hdr_ext->w1.u = 0;
507 sg = (union nix_send_sg_s *)(cmd + 2);
510 if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
511 ol_flags = m->ol_flags;
515 if (!(flags & NIX_TX_MULTI_SEG_F))
516 send_hdr->w0.total = m->data_len;
518 send_hdr->w0.total = m->pkt_len;
519 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
523 * 3 => IPV4 with csum
525 * L3type and L3ptr needs to be set for either
526 * L3 csum or L4 csum or LSO
530 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
531 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
532 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
533 const uint8_t ol3type =
534 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
535 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
536 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
539 w1.ol3type = ol3type;
540 mask = 0xffffull << ((!!ol3type) << 4);
541 w1.ol3ptr = ~mask & m->outer_l2_len;
542 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
545 w1.ol4type = csum + (csum << 1);
548 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
549 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
550 w1.il3ptr = w1.ol4ptr + m->l2_len;
551 w1.il4ptr = w1.il3ptr + m->l3_len;
552 /* Increment it by 1 if it is IPV4 as 3 is with csum */
553 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
556 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
558 /* In case of no tunnel header use only
559 * shift IL3/IL4 fields a bit to use
560 * OL3/OL4 for header checksum
563 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
564 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
566 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
567 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
568 const uint8_t outer_l2_len = m->outer_l2_len;
571 w1.ol3ptr = outer_l2_len;
572 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
573 /* Increment it by 1 if it is IPV4 as 3 is with csum */
574 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
575 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
576 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
579 w1.ol4type = csum + (csum << 1);
581 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
582 const uint8_t l2_len = m->l2_len;
584 /* Always use OLXPTR and OLXTYPE when only
585 * when one header is present
590 w1.ol4ptr = l2_len + m->l3_len;
591 /* Increment it by 1 if it is IPV4 as 3 is with csum */
592 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
593 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
594 !!(ol_flags & PKT_TX_IP_CKSUM);
597 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
600 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
601 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
602 /* HW will update ptr after vlan0 update */
603 send_hdr_ext->w1.vlan1_ins_ptr = 12;
604 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
606 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
607 /* 2B before end of l2 header */
608 send_hdr_ext->w1.vlan0_ins_ptr = 12;
609 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
612 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
616 mask = -(!w1.il3type);
617 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
619 send_hdr_ext->w0.lso_sb = lso_sb;
620 send_hdr_ext->w0.lso = 1;
621 send_hdr_ext->w0.lso_mps = m->tso_segsz;
622 send_hdr_ext->w0.lso_format =
623 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
624 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
626 /* Handle tunnel tso */
627 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
628 (ol_flags & PKT_TX_TUNNEL_MASK)) {
629 const uint8_t is_udp_tun =
630 (CNXK_NIX_UDP_TUN_BITMASK >>
631 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
633 uint8_t shift = is_udp_tun ? 32 : 0;
635 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
636 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
638 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
639 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
640 /* Update format for UDP tunneled packet */
641 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
645 if (flags & NIX_TX_NEED_SEND_HDR_W1)
646 send_hdr->w1.u = w1.u;
648 if (!(flags & NIX_TX_MULTI_SEG_F)) {
649 sg->seg1_size = send_hdr->w0.total;
650 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
652 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
653 /* DF bit = 1 if refcount of current mbuf or parent mbuf
655 * DF bit = 0 otherwise
657 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
659 /* Mark mempool object as "put" since it is freed by NIX */
660 if (!send_hdr->w0.df)
661 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
663 sg->seg1_size = m->data_len;
664 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
666 /* NOFF is handled later for multi-seg */
669 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
670 *sec = !!(ol_flags & PKT_TX_SEC_OFFLOAD);
673 static __rte_always_inline void
674 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
675 const uint16_t flags)
677 struct nix_send_ext_s *send_hdr_ext;
678 union nix_send_sg_s *sg;
680 /* With minimal offloads, 'cmd' being local could be optimized out to
681 * registers. In other cases, 'cmd' will be in stack. Intent is
682 * 'cmd' stores content from txq->cmd which is copied only once.
684 *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
686 if (flags & NIX_TX_NEED_EXT_HDR) {
687 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
688 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
691 sg = (union nix_send_sg_s *)(cmd + 4);
693 sg = (union nix_send_sg_s *)(cmd + 2);
695 /* In case of multi-seg, sg template is stored here */
696 *((union nix_send_sg_s *)lmt_addr) = *sg;
697 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
700 static __rte_always_inline void
701 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
702 const uint64_t ol_flags, const uint16_t no_segdw,
703 const uint16_t flags)
705 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
706 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
707 struct nix_send_ext_s *send_hdr_ext =
708 (struct nix_send_ext_s *)lmt_addr + 16;
709 uint64_t *lmt = (uint64_t *)lmt_addr;
710 uint16_t off = (no_segdw - 1) << 1;
711 struct nix_send_mem_s *send_mem;
713 send_mem = (struct nix_send_mem_s *)(lmt + off);
714 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
715 send_hdr_ext->w0.tstmp = 1;
716 if (flags & NIX_TX_MULTI_SEG_F) {
717 /* Retrieving the default desc values */
720 /* Using compiler barier to avoid voilation of C
723 rte_compiler_barrier();
726 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
727 * should not be recorded, hence changing the alg type to
728 * NIX_SENDMEMALG_SET and also changing send mem addr field to
729 * next 8 bytes as it corrpt the actual tx tstamp registered
732 send_mem->w0.subdc = NIX_SUBDC_MEM;
733 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
735 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
739 static __rte_always_inline uint16_t
740 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
742 struct nix_send_hdr_s *send_hdr;
743 union nix_send_sg_s *sg;
744 struct rte_mbuf *m_next;
745 uint64_t *slist, sg_u;
750 send_hdr = (struct nix_send_hdr_s *)cmd;
752 if (flags & NIX_TX_NEED_EXT_HDR)
757 sg = (union nix_send_sg_s *)&cmd[2 + off];
759 /* Start from second segment, first segment is already there */
762 nb_segs = m->nb_segs - 1;
764 slist = &cmd[3 + off + 1];
766 /* Set invert df if buffer is not to be freed by H/W */
767 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
768 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
770 /* Mark mempool object as "put" since it is freed by NIX */
771 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
772 if (!(sg_u & (1ULL << 55)))
773 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
780 /* Fill mbuf segments */
783 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
784 *slist = rte_mbuf_data_iova(m);
785 /* Set invert df if buffer is not to be freed by H/W */
786 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
787 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
788 /* Mark mempool object as "put" since it is freed by NIX
790 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
791 if (!(sg_u & (1ULL << (i + 55))))
792 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
797 if (i > 2 && nb_segs) {
799 /* Next SG subdesc */
800 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
803 sg = (union nix_send_sg_s *)slist;
813 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
814 /* Roundup extra dwords to multiple of 2 */
815 segdw = (segdw >> 1) + (segdw & 0x1);
817 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
818 send_hdr->w0.sizem1 = segdw - 1;
823 static __rte_always_inline uint16_t
824 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
825 uint64_t *cmd, uintptr_t base, const uint16_t flags)
827 struct cn10k_eth_txq *txq = tx_queue;
828 const rte_iova_t io_addr = txq->io_addr;
829 uint8_t lnum, c_lnum, c_shft, c_loff;
830 uintptr_t pa, lbase = txq->lmt_base;
831 uint16_t lmt_id, burst, left, i;
832 uintptr_t c_lbase = lbase;
833 rte_iova_t c_io_addr;
834 uint64_t lso_tun_fmt;
841 if (!(flags & NIX_TX_VWQE_F)) {
842 NIX_XMIT_FC_OR_RETURN(txq, pkts);
843 /* Reduce the cached count */
844 txq->fc_cache_pkts -= pkts;
847 /* Get cmd skeleton */
848 cn10k_nix_tx_skeleton(txq, cmd, flags);
850 if (flags & NIX_TX_OFFLOAD_TSO_F)
851 lso_tun_fmt = txq->lso_tun_fmt;
853 /* Get LMT base address and LMT ID as lcore id */
854 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
855 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
856 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
857 c_io_addr = txq->cpt_io_addr;
858 sa_base = txq->sa_base;
863 burst = left > 32 ? 32 : left;
866 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
872 for (i = 0; i < burst; i++) {
873 /* Perform header writes for TSO, barrier at
874 * lmt steorl will suffice.
876 if (flags & NIX_TX_OFFLOAD_TSO_F)
877 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
879 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
882 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
884 /* Prepare CPT instruction and get nixtx addr */
885 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
886 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
887 &c_lnum, &c_loff, &c_shft, sa_base,
890 /* Move NIX desc to LMT/NIXTX area */
891 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
892 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
893 tx_pkts[i]->ol_flags, 4, flags);
894 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
898 if (flags & NIX_TX_VWQE_F)
899 roc_sso_hws_head_wait(base);
904 /* Submit CPT instructions if any */
905 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
906 /* Reduce pkts to be sent to CPT */
907 burst -= ((c_lnum << 1) + c_loff);
908 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
914 data = cn10k_nix_tx_steor_data(flags);
915 pa = io_addr | (data & 0x7) << 4;
917 data |= (15ULL << 12);
918 data |= (uint64_t)lmt_id;
921 roc_lmt_submit_steorl(data, pa);
923 data = cn10k_nix_tx_steor_data(flags);
924 pa = io_addr | (data & 0x7) << 4;
926 data |= ((uint64_t)(burst - 17)) << 12;
927 data |= (uint64_t)(lmt_id + 16);
930 roc_lmt_submit_steorl(data, pa);
932 data = cn10k_nix_tx_steor_data(flags);
933 pa = io_addr | (data & 0x7) << 4;
935 data |= ((uint64_t)(burst - 1)) << 12;
939 roc_lmt_submit_steorl(data, pa);
949 static __rte_always_inline uint16_t
950 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
951 uint16_t pkts, uint64_t *cmd, uintptr_t base,
952 const uint16_t flags)
954 struct cn10k_eth_txq *txq = tx_queue;
955 uintptr_t pa0, pa1, lbase = txq->lmt_base;
956 const rte_iova_t io_addr = txq->io_addr;
957 uint16_t segdw, lmt_id, burst, left, i;
958 uint8_t lnum, c_lnum, c_loff;
959 uintptr_t c_lbase = lbase;
960 uint64_t data0, data1;
961 rte_iova_t c_io_addr;
962 uint64_t lso_tun_fmt;
963 uint8_t shft, c_shft;
970 NIX_XMIT_FC_OR_RETURN(txq, pkts);
972 cn10k_nix_tx_skeleton(txq, cmd, flags);
974 /* Reduce the cached count */
975 txq->fc_cache_pkts -= pkts;
977 if (flags & NIX_TX_OFFLOAD_TSO_F)
978 lso_tun_fmt = txq->lso_tun_fmt;
980 /* Get LMT base address and LMT ID as lcore id */
981 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
982 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
983 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
984 c_io_addr = txq->cpt_io_addr;
985 sa_base = txq->sa_base;
990 burst = left > 32 ? 32 : left;
995 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1001 for (i = 0; i < burst; i++) {
1002 /* Perform header writes for TSO, barrier at
1003 * lmt steorl will suffice.
1005 if (flags & NIX_TX_OFFLOAD_TSO_F)
1006 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1008 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1011 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1013 /* Prepare CPT instruction and get nixtx addr */
1014 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1015 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1016 &c_lnum, &c_loff, &c_shft, sa_base,
1019 /* Move NIX desc to LMT/NIXTX area */
1020 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1022 /* Store sg list directly on lmt line */
1023 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1025 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
1026 tx_pkts[i]->ol_flags, segdw,
1028 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1030 data128 |= (((__uint128_t)(segdw - 1)) << shft);
1035 if (flags & NIX_TX_VWQE_F)
1036 roc_sso_hws_head_wait(base);
1041 /* Submit CPT instructions if any */
1042 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1043 /* Reduce pkts to be sent to CPT */
1044 burst -= ((c_lnum << 1) + c_loff);
1045 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1049 data0 = (uint64_t)data128;
1050 data1 = (uint64_t)(data128 >> 64);
1051 /* Make data0 similar to data1 */
1055 pa0 = io_addr | (data0 & 0x7) << 4;
1057 /* Move lmtst1..15 sz to bits 63:19 */
1059 data0 |= (15ULL << 12);
1060 data0 |= (uint64_t)lmt_id;
1063 roc_lmt_submit_steorl(data0, pa0);
1065 pa1 = io_addr | (data1 & 0x7) << 4;
1068 data1 |= ((uint64_t)(burst - 17)) << 12;
1069 data1 |= (uint64_t)(lmt_id + 16);
1072 roc_lmt_submit_steorl(data1, pa1);
1074 pa0 = io_addr | (data0 & 0x7) << 4;
1076 /* Move lmtst1..15 sz to bits 63:19 */
1078 data0 |= ((burst - 1) << 12);
1079 data0 |= (uint64_t)lmt_id;
1082 roc_lmt_submit_steorl(data0, pa0);
1092 #if defined(RTE_ARCH_ARM64)
1094 static __rte_always_inline void
1095 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1096 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1097 const uint64_t flags, const uint64_t lso_tun_fmt)
1102 if (!(ol_flags & PKT_TX_TCP_SEG))
1105 mask = -(!w1->il3type);
1106 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1109 w0->lso_sb = lso_sb;
1110 w0->lso_mps = m->tso_segsz;
1111 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
1112 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1114 /* Handle tunnel tso */
1115 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1116 (ol_flags & PKT_TX_TUNNEL_MASK)) {
1117 const uint8_t is_udp_tun =
1118 (CNXK_NIX_UDP_TUN_BITMASK >>
1119 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
1121 uint8_t shift = is_udp_tun ? 32 : 0;
1123 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
1124 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
1126 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1127 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1128 /* Update format for UDP tunneled packet */
1130 w0->lso_format = (lso_tun_fmt >> shift);
1134 static __rte_always_inline void
1135 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1136 union nix_send_hdr_w0_u *sh,
1137 union nix_send_sg_s *sg, const uint32_t flags)
1139 struct rte_mbuf *m_next;
1140 uint64_t *slist, sg_u;
1144 sh->total = m->pkt_len;
1145 /* Clear sg->u header before use */
1146 sg->u &= 0xFC00000000000000;
1150 sg_u = sg_u | ((uint64_t)m->data_len);
1152 nb_segs = m->nb_segs - 1;
1155 /* Set invert df if buffer is not to be freed by H/W */
1156 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1157 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1158 /* Mark mempool object as "put" since it is freed by NIX */
1159 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1160 if (!(sg_u & (1ULL << 55)))
1161 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
1166 /* Fill mbuf segments */
1169 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1170 *slist = rte_mbuf_data_iova(m);
1171 /* Set invert df if buffer is not to be freed by H/W */
1172 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1173 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1174 /* Mark mempool object as "put" since it is freed by NIX
1176 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1177 if (!(sg_u & (1ULL << (i + 55))))
1178 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
1184 if (i > 2 && nb_segs) {
1186 /* Next SG subdesc */
1187 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1190 sg = (union nix_send_sg_s *)slist;
1201 static __rte_always_inline void
1202 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1203 uint64x2_t *cmd1, const uint8_t segdw,
1204 const uint32_t flags)
1206 union nix_send_hdr_w0_u sh;
1207 union nix_send_sg_s sg;
1209 if (m->nb_segs == 1) {
1210 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1211 sg.u = vgetq_lane_u64(cmd1[0], 0);
1212 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1213 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1216 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1217 sg.u = vgetq_lane_u64(cmd1[0], 0);
1218 if (!(sg.u & (1ULL << 55)))
1219 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
1225 sh.u = vgetq_lane_u64(cmd0[0], 0);
1226 sg.u = vgetq_lane_u64(cmd1[0], 0);
1228 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1230 sh.sizem1 = segdw - 1;
1231 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1232 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1235 #define NIX_DESCS_PER_LOOP 4
1237 static __rte_always_inline uint8_t
1238 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1239 uint64x2_t *cmd1, uint64x2_t *cmd2,
1240 uint64x2_t *cmd3, uint8_t *segdw,
1241 uint64_t *lmt_addr, __uint128_t *data128,
1242 uint8_t *shift, const uint16_t flags)
1244 uint8_t j, off, lmt_used;
1246 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1247 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1248 /* No segments in 4 consecutive packets. */
1249 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1250 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1251 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1254 vst1q_u64(lmt_addr, cmd0[0]);
1255 vst1q_u64(lmt_addr + 2, cmd1[0]);
1256 vst1q_u64(lmt_addr + 4, cmd0[1]);
1257 vst1q_u64(lmt_addr + 6, cmd1[1]);
1258 vst1q_u64(lmt_addr + 8, cmd0[2]);
1259 vst1q_u64(lmt_addr + 10, cmd1[2]);
1260 vst1q_u64(lmt_addr + 12, cmd0[3]);
1261 vst1q_u64(lmt_addr + 14, cmd1[3]);
1263 *data128 |= ((__uint128_t)7) << *shift;
1271 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1272 /* Fit consecutive packets in same LMTLINE. */
1273 if ((segdw[j] + segdw[j + 1]) <= 8) {
1274 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1275 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1278 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1281 segdw[j + 1], flags);
1282 /* TSTAMP takes 4 each, no segs. */
1283 vst1q_u64(lmt_addr, cmd0[j]);
1284 vst1q_u64(lmt_addr + 2, cmd2[j]);
1285 vst1q_u64(lmt_addr + 4, cmd1[j]);
1286 vst1q_u64(lmt_addr + 6, cmd3[j]);
1288 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1289 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1290 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1291 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1292 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1293 /* EXT header take 3 each, space for 2 segs.*/
1294 cn10k_nix_prepare_mseg_vec(mbufs[j],
1298 vst1q_u64(lmt_addr, cmd0[j]);
1299 vst1q_u64(lmt_addr + 2, cmd2[j]);
1300 vst1q_u64(lmt_addr + 4, cmd1[j]);
1303 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1304 lmt_addr + 12 + off,
1307 segdw[j + 1], flags);
1308 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1309 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1310 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1312 cn10k_nix_prepare_mseg_vec(mbufs[j],
1316 vst1q_u64(lmt_addr, cmd0[j]);
1317 vst1q_u64(lmt_addr + 2, cmd1[j]);
1320 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1324 segdw[j + 1], flags);
1325 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1326 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1328 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1333 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1334 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1335 cn10k_nix_prepare_mseg_vec(mbufs[j],
1339 vst1q_u64(lmt_addr, cmd0[j]);
1340 vst1q_u64(lmt_addr + 2, cmd2[j]);
1341 vst1q_u64(lmt_addr + 4, cmd1[j]);
1344 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1345 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1346 cn10k_nix_prepare_mseg_vec(mbufs[j],
1350 vst1q_u64(lmt_addr, cmd0[j]);
1351 vst1q_u64(lmt_addr + 2, cmd2[j]);
1352 vst1q_u64(lmt_addr + 4, cmd1[j]);
1354 cn10k_nix_prepare_mseg_vec(mbufs[j],
1358 vst1q_u64(lmt_addr, cmd0[j]);
1359 vst1q_u64(lmt_addr + 2, cmd1[j]);
1361 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1372 static __rte_always_inline void
1373 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1374 uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1376 /* Go to next line if we are out of space */
1377 if ((*loff + (dw << 4)) > 128) {
1378 *data128 = *data128 |
1379 (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1380 *shift = *shift + 3;
1385 *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1386 *loff = *loff + (dw << 4);
1389 static __rte_always_inline void
1390 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1391 uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1392 uint64x2_t cmd3, const uint16_t flags)
1396 /* Handle no fast free when security is enabled without mseg */
1397 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1398 (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1399 !(flags & NIX_TX_MULTI_SEG_F)) {
1400 union nix_send_sg_s sg;
1402 sg.u = vgetq_lane_u64(cmd1, 0);
1403 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1404 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1406 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1407 sg.u = vgetq_lane_u64(cmd1, 0);
1408 if (!(sg.u & (1ULL << 55)))
1409 __mempool_check_cookies(mbuf->pool, (void **)&mbuf, 1,
1414 if (flags & NIX_TX_MULTI_SEG_F) {
1415 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1416 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1417 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1418 &cmd0, &cmd1, segdw, flags);
1419 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1420 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1421 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1424 vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1425 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1426 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1427 &cmd0, &cmd1, segdw, flags);
1428 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1429 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1430 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1432 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1433 &cmd0, &cmd1, segdw, flags);
1434 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1435 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1437 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1438 /* Store the prepared send desc to LMT lines */
1439 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1440 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1441 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1442 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1443 vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1445 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1446 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1447 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1450 /* Store the prepared send desc to LMT lines */
1451 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1452 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1456 static __rte_always_inline uint16_t
1457 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1458 uint16_t pkts, uint64_t *cmd, uintptr_t base,
1459 const uint16_t flags)
1461 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1462 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1463 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1464 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1465 uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1466 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1467 uint64x2_t senddesc01_w0, senddesc23_w0;
1468 uint64x2_t senddesc01_w1, senddesc23_w1;
1469 uint64x2_t sendext01_w0, sendext23_w0;
1470 uint64x2_t sendext01_w1, sendext23_w1;
1471 uint64x2_t sendmem01_w0, sendmem23_w0;
1472 uint64x2_t sendmem01_w1, sendmem23_w1;
1473 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1474 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1475 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1476 struct cn10k_eth_txq *txq = tx_queue;
1477 rte_iova_t io_addr = txq->io_addr;
1478 uintptr_t laddr = txq->lmt_base;
1479 uint8_t c_lnum, c_shft, c_loff;
1480 uint64x2_t ltypes01, ltypes23;
1481 uint64x2_t xtmp128, ytmp128;
1482 uint64x2_t xmask01, xmask23;
1483 uintptr_t c_laddr = laddr;
1484 uint8_t lnum, shift, loff;
1485 rte_iova_t c_io_addr;
1488 __uint128_t data128;
1492 if (!(flags & NIX_TX_VWQE_F)) {
1493 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1494 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1495 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1496 /* Reduce the cached count */
1497 txq->fc_cache_pkts -= pkts;
1499 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1500 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1503 /* Perform header writes before barrier for TSO */
1504 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1505 for (i = 0; i < pkts; i++)
1506 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1509 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1510 senddesc23_w0 = senddesc01_w0;
1511 senddesc01_w1 = vdupq_n_u64(0);
1512 senddesc23_w1 = senddesc01_w1;
1513 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1514 sgdesc23_w0 = sgdesc01_w0;
1516 /* Load command defaults into vector variables. */
1517 if (flags & NIX_TX_NEED_EXT_HDR) {
1518 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1519 sendext23_w0 = sendext01_w0;
1520 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1521 sendext23_w1 = sendext01_w1;
1522 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1523 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1524 sendmem23_w0 = sendmem01_w0;
1525 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1526 sendmem23_w1 = sendmem01_w1;
1530 /* Get LMT base address and LMT ID as lcore id */
1531 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1532 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1533 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1534 c_io_addr = txq->cpt_io_addr;
1535 sa_base = txq->sa_base;
1540 /* Number of packets to prepare depends on offloads enabled. */
1541 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1542 cn10k_nix_pkts_per_vec_brst(flags) :
1544 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1549 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1556 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1557 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1562 if (flags & NIX_TX_MULTI_SEG_F) {
1565 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1566 struct rte_mbuf *m = tx_pkts[j];
1568 /* Get dwords based on nb_segs. */
1569 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1570 /* Add dwords based on offloads. */
1571 segdw[j] += 1 + /* SEND HDR */
1572 !!(flags & NIX_TX_NEED_EXT_HDR) +
1573 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1576 /* Check if there are enough LMTLINES for this loop */
1577 if (lnum + 4 > 32) {
1578 uint8_t ldwords_con = 0, lneeded = 0;
1579 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1580 ldwords_con += segdw[j];
1581 if (ldwords_con > 8) {
1583 ldwords_con = segdw[j];
1587 if (lnum + lneeded > 32) {
1593 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1595 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1596 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1598 senddesc23_w0 = senddesc01_w0;
1599 sgdesc23_w0 = sgdesc01_w0;
1601 /* Clear vlan enables. */
1602 if (flags & NIX_TX_NEED_EXT_HDR) {
1603 sendext01_w1 = vbicq_u64(sendext01_w1,
1604 vdupq_n_u64(0x3FFFF00FFFF00));
1605 sendext23_w1 = sendext01_w1;
1608 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1609 /* Reset send mem alg to SETTSTMP from SUB*/
1610 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1611 vdupq_n_u64(BIT_ULL(59)));
1612 /* Reset send mem address to default. */
1614 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1615 sendmem23_w0 = sendmem01_w0;
1616 sendmem23_w1 = sendmem01_w1;
1619 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1620 /* Clear the LSO enable bit. */
1621 sendext01_w0 = vbicq_u64(sendext01_w0,
1622 vdupq_n_u64(BIT_ULL(14)));
1623 sendext23_w0 = sendext01_w0;
1626 /* Move mbufs to iova */
1627 mbuf0 = (uint64_t *)tx_pkts[0];
1628 mbuf1 = (uint64_t *)tx_pkts[1];
1629 mbuf2 = (uint64_t *)tx_pkts[2];
1630 mbuf3 = (uint64_t *)tx_pkts[3];
1632 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1633 offsetof(struct rte_mbuf, buf_iova));
1634 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1635 offsetof(struct rte_mbuf, buf_iova));
1636 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1637 offsetof(struct rte_mbuf, buf_iova));
1638 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1639 offsetof(struct rte_mbuf, buf_iova));
1641 * Get mbuf's, olflags, iova, pktlen, dataoff
1642 * dataoff_iovaX.D[0] = iova,
1643 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1644 * len_olflagsX.D[0] = ol_flags,
1645 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1647 dataoff_iova0 = vld1q_u64(mbuf0);
1648 len_olflags0 = vld1q_u64(mbuf0 + 2);
1649 dataoff_iova1 = vld1q_u64(mbuf1);
1650 len_olflags1 = vld1q_u64(mbuf1 + 2);
1651 dataoff_iova2 = vld1q_u64(mbuf2);
1652 len_olflags2 = vld1q_u64(mbuf2 + 2);
1653 dataoff_iova3 = vld1q_u64(mbuf3);
1654 len_olflags3 = vld1q_u64(mbuf3 + 2);
1656 /* Move mbufs to point pool */
1657 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1658 offsetof(struct rte_mbuf, pool) -
1659 offsetof(struct rte_mbuf, buf_iova));
1660 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1661 offsetof(struct rte_mbuf, pool) -
1662 offsetof(struct rte_mbuf, buf_iova));
1663 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1664 offsetof(struct rte_mbuf, pool) -
1665 offsetof(struct rte_mbuf, buf_iova));
1666 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1667 offsetof(struct rte_mbuf, pool) -
1668 offsetof(struct rte_mbuf, buf_iova));
1670 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1671 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1672 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1674 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1675 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1678 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1679 : [a] "+w"(senddesc01_w1)
1680 : [in] "r"(mbuf0 + 2)
1683 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1684 : [a] "+w"(senddesc01_w1)
1685 : [in] "r"(mbuf1 + 2)
1688 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1689 : [b] "+w"(senddesc23_w1)
1690 : [in] "r"(mbuf2 + 2)
1693 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1694 : [b] "+w"(senddesc23_w1)
1695 : [in] "r"(mbuf3 + 2)
1698 /* Get pool pointer alone */
1699 mbuf0 = (uint64_t *)*mbuf0;
1700 mbuf1 = (uint64_t *)*mbuf1;
1701 mbuf2 = (uint64_t *)*mbuf2;
1702 mbuf3 = (uint64_t *)*mbuf3;
1704 /* Get pool pointer alone */
1705 mbuf0 = (uint64_t *)*mbuf0;
1706 mbuf1 = (uint64_t *)*mbuf1;
1707 mbuf2 = (uint64_t *)*mbuf2;
1708 mbuf3 = (uint64_t *)*mbuf3;
1711 const uint8x16_t shuf_mask2 = {
1712 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1713 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1715 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1716 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1718 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1719 const uint64x2_t and_mask0 = {
1724 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1725 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1726 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1727 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1730 * Pick only 16 bits of pktlen preset at bits 63:32
1731 * and place them at bits 15:0.
1733 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1734 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1736 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1737 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1738 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1740 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1741 * pktlen at 15:0 position.
1743 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1744 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1745 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1746 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1748 /* Move mbuf to point to pool_id. */
1749 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1750 offsetof(struct rte_mempool, pool_id));
1751 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1752 offsetof(struct rte_mempool, pool_id));
1753 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1754 offsetof(struct rte_mempool, pool_id));
1755 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1756 offsetof(struct rte_mempool, pool_id));
1758 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1759 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1761 * Lookup table to translate ol_flags to
1762 * il3/il4 types. But we still use ol3/ol4 types in
1763 * senddesc_w1 as only one header processing is enabled.
1765 const uint8x16_t tbl = {
1766 /* [0-15] = il4type:il3type */
1767 0x04, /* none (IPv6 assumed) */
1768 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
1769 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
1770 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
1771 0x03, /* PKT_TX_IP_CKSUM */
1772 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
1773 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
1774 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
1775 0x02, /* PKT_TX_IPV4 */
1776 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
1777 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
1778 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
1779 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
1780 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1783 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1786 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1791 /* Extract olflags to translate to iltypes */
1792 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1793 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1796 * E(47):L3_LEN(9):L2_LEN(7+z)
1797 * E(47):L3_LEN(9):L2_LEN(7+z)
1799 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1800 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1802 /* Move OLFLAGS bits 55:52 to 51:48
1803 * with zeros preprended on the byte and rest
1806 xtmp128 = vshrq_n_u8(xtmp128, 4);
1807 ytmp128 = vshrq_n_u8(ytmp128, 4);
1809 * E(48):L3_LEN(8):L2_LEN(z+7)
1810 * E(48):L3_LEN(8):L2_LEN(z+7)
1812 const int8x16_t tshft3 = {
1813 -1, 0, 8, 8, 8, 8, 8, 8,
1814 -1, 0, 8, 8, 8, 8, 8, 8,
1817 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1818 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1821 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1822 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1824 /* Pick only relevant fields i.e Bit 48:55 of iltype
1825 * and place it in ol3/ol4type of senddesc_w1
1827 const uint8x16_t shuf_mask0 = {
1828 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1829 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1832 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1833 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1835 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1836 * a [E(32):E(16):OL3(8):OL2(8)]
1838 * a [E(32):E(16):(OL3+OL2):OL2]
1839 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1841 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1842 vshlq_n_u16(senddesc01_w1, 8));
1843 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1844 vshlq_n_u16(senddesc23_w1, 8));
1846 /* Move ltypes to senddesc*_w1 */
1847 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1848 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1849 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1850 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1852 * Lookup table to translate ol_flags to
1856 const uint8x16_t tbl = {
1857 /* [0-15] = ol4type:ol3type */
1859 0x03, /* OUTER_IP_CKSUM */
1860 0x02, /* OUTER_IPV4 */
1861 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1862 0x04, /* OUTER_IPV6 */
1863 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1864 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1865 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1868 0x00, /* OUTER_UDP_CKSUM */
1869 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1870 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1871 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1874 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1875 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1878 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1881 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1882 * OUTER_IPV4 | OUTER_IP_CKSUM
1886 /* Extract olflags to translate to iltypes */
1887 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1888 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1891 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1892 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1894 const uint8x16_t shuf_mask5 = {
1895 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1896 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1898 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1899 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1901 /* Extract outer ol flags only */
1902 const uint64x2_t o_cksum_mask = {
1907 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1908 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1910 /* Extract OUTER_UDP_CKSUM bit 41 and
1914 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1915 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1917 /* Shift oltype by 2 to start nibble from BIT(56)
1918 * instead of BIT(58)
1920 xtmp128 = vshrq_n_u8(xtmp128, 2);
1921 ytmp128 = vshrq_n_u8(ytmp128, 2);
1923 * E(48):L3_LEN(8):L2_LEN(z+7)
1924 * E(48):L3_LEN(8):L2_LEN(z+7)
1926 const int8x16_t tshft3 = {
1927 -1, 0, 8, 8, 8, 8, 8, 8,
1928 -1, 0, 8, 8, 8, 8, 8, 8,
1931 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1932 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1935 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1936 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1938 /* Pick only relevant fields i.e Bit 56:63 of oltype
1939 * and place it in ol3/ol4type of senddesc_w1
1941 const uint8x16_t shuf_mask0 = {
1942 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1943 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1946 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1947 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1949 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1950 * a [E(32):E(16):OL3(8):OL2(8)]
1952 * a [E(32):E(16):(OL3+OL2):OL2]
1953 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1955 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1956 vshlq_n_u16(senddesc01_w1, 8));
1957 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1958 vshlq_n_u16(senddesc23_w1, 8));
1960 /* Move ltypes to senddesc*_w1 */
1961 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1962 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1963 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1964 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1965 /* Lookup table to translate ol_flags to
1966 * ol4type, ol3type, il4type, il3type of senddesc_w1
1968 const uint8x16x2_t tbl = {{
1970 /* [0-15] = il4type:il3type */
1971 0x04, /* none (IPv6) */
1972 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1973 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1974 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1975 0x03, /* PKT_TX_IP_CKSUM */
1976 0x13, /* PKT_TX_IP_CKSUM |
1979 0x23, /* PKT_TX_IP_CKSUM |
1982 0x33, /* PKT_TX_IP_CKSUM |
1985 0x02, /* PKT_TX_IPV4 */
1986 0x12, /* PKT_TX_IPV4 |
1989 0x22, /* PKT_TX_IPV4 |
1992 0x32, /* PKT_TX_IPV4 |
1995 0x03, /* PKT_TX_IPV4 |
1998 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
2001 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
2004 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
2010 /* [16-31] = ol4type:ol3type */
2012 0x03, /* OUTER_IP_CKSUM */
2013 0x02, /* OUTER_IPV4 */
2014 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2015 0x04, /* OUTER_IPV6 */
2016 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2017 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2018 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2021 0x00, /* OUTER_UDP_CKSUM */
2022 0x33, /* OUTER_UDP_CKSUM |
2025 0x32, /* OUTER_UDP_CKSUM |
2028 0x33, /* OUTER_UDP_CKSUM |
2029 * OUTER_IPV4 | OUTER_IP_CKSUM
2031 0x34, /* OUTER_UDP_CKSUM |
2034 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2037 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2040 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2041 * OUTER_IPV4 | OUTER_IP_CKSUM
2046 /* Extract olflags to translate to oltype & iltype */
2047 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2048 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2051 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2052 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2054 const uint32x4_t tshft_4 = {
2060 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2061 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2064 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2065 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2067 const uint8x16_t shuf_mask5 = {
2068 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2069 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2071 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2072 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2074 /* Extract outer and inner header ol_flags */
2075 const uint64x2_t oi_cksum_mask = {
2080 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2081 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2083 /* Extract OUTER_UDP_CKSUM bit 41 and
2087 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2088 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2090 /* Shift right oltype by 2 and iltype by 4
2091 * to start oltype nibble from BIT(58)
2092 * instead of BIT(56) and iltype nibble from BIT(48)
2093 * instead of BIT(52).
2095 const int8x16_t tshft5 = {
2096 8, 8, 8, 8, 8, 8, -4, -2,
2097 8, 8, 8, 8, 8, 8, -4, -2,
2100 xtmp128 = vshlq_u8(xtmp128, tshft5);
2101 ytmp128 = vshlq_u8(ytmp128, tshft5);
2103 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2104 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2106 const int8x16_t tshft3 = {
2107 -1, 0, -1, 0, 0, 0, 0, 0,
2108 -1, 0, -1, 0, 0, 0, 0, 0,
2111 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2112 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2114 /* Mark Bit(4) of oltype */
2115 const uint64x2_t oi_cksum_mask2 = {
2120 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2121 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2124 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2125 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2127 /* Pick only relevant fields i.e Bit 48:55 of iltype and
2128 * Bit 56:63 of oltype and place it in corresponding
2129 * place in senddesc_w1.
2131 const uint8x16_t shuf_mask0 = {
2132 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2133 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2136 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2137 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2139 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2140 * l3len, l2len, ol3len, ol2len.
2141 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2143 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2145 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2146 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2148 senddesc01_w1 = vaddq_u8(senddesc01_w1,
2149 vshlq_n_u32(senddesc01_w1, 8));
2150 senddesc23_w1 = vaddq_u8(senddesc23_w1,
2151 vshlq_n_u32(senddesc23_w1, 8));
2153 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2154 senddesc01_w1 = vaddq_u8(
2155 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2156 senddesc23_w1 = vaddq_u8(
2157 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2159 /* Move ltypes to senddesc*_w1 */
2160 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2161 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2164 xmask01 = vdupq_n_u64(0);
2166 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2171 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2176 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2181 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2185 xmask01 = vshlq_n_u64(xmask01, 20);
2186 xmask23 = vshlq_n_u64(xmask23, 20);
2188 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2189 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2191 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2192 /* Tx ol_flag for vlan. */
2193 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
2194 /* Bit enable for VLAN1 */
2195 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2196 /* Tx ol_flag for QnQ. */
2197 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
2198 /* Bit enable for VLAN0 */
2199 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2200 /* Load vlan values from packet. outer is VLAN 0 */
2201 uint64x2_t ext01 = {
2202 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2203 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2204 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2205 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2207 uint64x2_t ext23 = {
2208 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2209 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2210 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2211 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2214 /* Get ol_flags of the packets. */
2215 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2216 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2218 /* ORR vlan outer/inner values into cmd. */
2219 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2220 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2222 /* Test for offload enable bits and generate masks. */
2223 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2225 vandq_u64(vtstq_u64(xtmp128, olq),
2227 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2229 vandq_u64(vtstq_u64(ytmp128, olq),
2232 /* Set vlan enable bits into cmd based on mask. */
2233 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2234 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2237 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2238 /* Tx ol_flag for timestam. */
2239 const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
2240 PKT_TX_IEEE1588_TMST};
2241 /* Set send mem alg to SUB. */
2242 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2243 /* Increment send mem address by 8. */
2244 const uint64x2_t addr = {0x8, 0x8};
2246 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2247 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2249 /* Check if timestamp is requested and generate inverted
2250 * mask as we need not make any changes to default cmd
2253 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2254 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2256 /* Change send mem address to an 8 byte offset when
2257 * TSTMP is disabled.
2259 sendmem01_w1 = vaddq_u64(sendmem01_w1,
2260 vandq_u64(xtmp128, addr));
2261 sendmem23_w1 = vaddq_u64(sendmem23_w1,
2262 vandq_u64(ytmp128, addr));
2263 /* Change send mem alg to SUB when TSTMP is disabled. */
2264 sendmem01_w0 = vorrq_u64(sendmem01_w0,
2265 vandq_u64(xtmp128, alg));
2266 sendmem23_w0 = vorrq_u64(sendmem23_w0,
2267 vandq_u64(ytmp128, alg));
2269 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2270 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2271 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2272 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2275 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2276 const uint64_t lso_fmt = txq->lso_tun_fmt;
2277 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2278 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2280 /* Extract SD W1 as we need to set L4 types. */
2281 vst1q_u64(sd_w1, senddesc01_w1);
2282 vst1q_u64(sd_w1 + 2, senddesc23_w1);
2284 /* Extract SX W0 as we need to set LSO fields. */
2285 vst1q_u64(sx_w0, sendext01_w0);
2286 vst1q_u64(sx_w0 + 2, sendext23_w0);
2288 /* Extract ol_flags. */
2289 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2290 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2292 /* Prepare individual mbufs. */
2293 cn10k_nix_prepare_tso(tx_pkts[0],
2294 (union nix_send_hdr_w1_u *)&sd_w1[0],
2295 (union nix_send_ext_w0_u *)&sx_w0[0],
2296 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2298 cn10k_nix_prepare_tso(tx_pkts[1],
2299 (union nix_send_hdr_w1_u *)&sd_w1[1],
2300 (union nix_send_ext_w0_u *)&sx_w0[1],
2301 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2303 cn10k_nix_prepare_tso(tx_pkts[2],
2304 (union nix_send_hdr_w1_u *)&sd_w1[2],
2305 (union nix_send_ext_w0_u *)&sx_w0[2],
2306 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2308 cn10k_nix_prepare_tso(tx_pkts[3],
2309 (union nix_send_hdr_w1_u *)&sd_w1[3],
2310 (union nix_send_ext_w0_u *)&sx_w0[3],
2311 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2313 senddesc01_w1 = vld1q_u64(sd_w1);
2314 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2316 sendext01_w0 = vld1q_u64(sx_w0);
2317 sendext23_w0 = vld1q_u64(sx_w0 + 2);
2320 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2321 !(flags & NIX_TX_MULTI_SEG_F) &&
2322 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2323 /* Set don't free bit if reference count > 1 */
2324 xmask01 = vdupq_n_u64(0);
2327 /* Move mbufs to iova */
2328 mbuf0 = (uint64_t *)tx_pkts[0];
2329 mbuf1 = (uint64_t *)tx_pkts[1];
2330 mbuf2 = (uint64_t *)tx_pkts[2];
2331 mbuf3 = (uint64_t *)tx_pkts[3];
2333 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2334 vsetq_lane_u64(0x80000, xmask01, 0);
2336 __mempool_check_cookies(
2337 ((struct rte_mbuf *)mbuf0)->pool,
2338 (void **)&mbuf0, 1, 0);
2340 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2341 vsetq_lane_u64(0x80000, xmask01, 1);
2343 __mempool_check_cookies(
2344 ((struct rte_mbuf *)mbuf1)->pool,
2345 (void **)&mbuf1, 1, 0);
2347 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2348 vsetq_lane_u64(0x80000, xmask23, 0);
2350 __mempool_check_cookies(
2351 ((struct rte_mbuf *)mbuf2)->pool,
2352 (void **)&mbuf2, 1, 0);
2354 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2355 vsetq_lane_u64(0x80000, xmask23, 1);
2357 __mempool_check_cookies(
2358 ((struct rte_mbuf *)mbuf3)->pool,
2359 (void **)&mbuf3, 1, 0);
2360 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2361 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2362 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2363 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2364 /* Move mbufs to iova */
2365 mbuf0 = (uint64_t *)tx_pkts[0];
2366 mbuf1 = (uint64_t *)tx_pkts[1];
2367 mbuf2 = (uint64_t *)tx_pkts[2];
2368 mbuf3 = (uint64_t *)tx_pkts[3];
2370 /* Mark mempool object as "put" since
2371 * it is freed by NIX
2373 __mempool_check_cookies(
2374 ((struct rte_mbuf *)mbuf0)->pool,
2375 (void **)&mbuf0, 1, 0);
2377 __mempool_check_cookies(
2378 ((struct rte_mbuf *)mbuf1)->pool,
2379 (void **)&mbuf1, 1, 0);
2381 __mempool_check_cookies(
2382 ((struct rte_mbuf *)mbuf2)->pool,
2383 (void **)&mbuf2, 1, 0);
2385 __mempool_check_cookies(
2386 ((struct rte_mbuf *)mbuf3)->pool,
2387 (void **)&mbuf3, 1, 0);
2390 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2391 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2392 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2393 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2394 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2396 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2397 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2398 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2399 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2401 if (flags & NIX_TX_NEED_EXT_HDR) {
2402 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2403 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2404 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2405 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2408 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2409 const uint64x2_t olf = {PKT_TX_SEC_OFFLOAD,
2410 PKT_TX_SEC_OFFLOAD};
2414 /* Extract ol_flags. */
2415 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2416 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2418 xtmp128 = vtstq_u64(olf, xtmp128);
2419 ytmp128 = vtstq_u64(olf, ytmp128);
2422 dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2423 if (vgetq_lane_u64(xtmp128, 0))
2424 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2425 &cmd1[0], &next, c_laddr,
2427 &c_shft, sa_base, flags);
2429 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2430 &shift, &wd.data128, &next);
2432 /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2433 cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2434 cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2438 dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2439 if (vgetq_lane_u64(xtmp128, 1))
2440 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2441 &cmd1[1], &next, c_laddr,
2443 &c_shft, sa_base, flags);
2445 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2446 &shift, &wd.data128, &next);
2448 /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2449 cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2450 cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2454 dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2455 if (vgetq_lane_u64(ytmp128, 0))
2456 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2457 &cmd1[2], &next, c_laddr,
2459 &c_shft, sa_base, flags);
2461 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2462 &shift, &wd.data128, &next);
2464 /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2465 cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2466 cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2470 dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2471 if (vgetq_lane_u64(ytmp128, 1))
2472 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2473 &cmd1[3], &next, c_laddr,
2475 &c_shft, sa_base, flags);
2477 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2478 &shift, &wd.data128, &next);
2480 /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2481 cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2482 cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2485 } else if (flags & NIX_TX_MULTI_SEG_F) {
2489 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2492 LMT_OFF(laddr, lnum,
2494 &wd.data128, &shift,
2497 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2498 /* Store the prepared send desc to LMT lines */
2499 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2500 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2501 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2502 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2503 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2504 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2505 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2506 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2507 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2509 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2510 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2511 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2512 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2513 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2514 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2515 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2516 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2518 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2519 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2520 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2521 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2522 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2523 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2525 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2526 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2527 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2528 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2529 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2530 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2534 /* Store the prepared send desc to LMT lines */
2535 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2536 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2537 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2538 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2539 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2540 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2541 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2542 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2546 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2549 /* Roundup lnum to last line if it is partial */
2550 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2551 lnum = lnum + !!loff;
2552 wd.data128 = wd.data128 |
2553 (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2556 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2559 if (flags & NIX_TX_VWQE_F)
2560 roc_sso_hws_head_wait(base);
2564 /* Submit CPT instructions if any */
2565 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2566 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2571 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2572 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2574 pa = io_addr | (wd.data[0] & 0x7) << 4;
2575 wd.data[0] &= ~0x7ULL;
2577 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2580 wd.data[0] |= (15ULL << 12);
2581 wd.data[0] |= (uint64_t)lmt_id;
2584 roc_lmt_submit_steorl(wd.data[0], pa);
2586 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2587 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2589 pa = io_addr | (wd.data[1] & 0x7) << 4;
2590 wd.data[1] &= ~0x7ULL;
2592 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2595 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2596 wd.data[1] |= (uint64_t)(lmt_id + 16);
2599 roc_lmt_submit_steorl(wd.data[1], pa);
2601 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2602 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2604 pa = io_addr | (wd.data[0] & 0x7) << 4;
2605 wd.data[0] &= ~0x7ULL;
2607 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2610 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2611 wd.data[0] |= lmt_id;
2614 roc_lmt_submit_steorl(wd.data[0], pa);
2621 if (unlikely(scalar)) {
2622 if (flags & NIX_TX_MULTI_SEG_F)
2623 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2627 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2635 static __rte_always_inline uint16_t
2636 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2637 uint16_t pkts, uint64_t *cmd, uintptr_t base,
2638 const uint16_t flags)
2640 RTE_SET_USED(tx_queue);
2641 RTE_SET_USED(tx_pkts);
2644 RTE_SET_USED(flags);
2650 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2651 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2652 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2653 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2654 #define TSO_F NIX_TX_OFFLOAD_TSO_F
2655 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2656 #define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F
2658 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2659 #define NIX_TX_FASTPATH_MODES \
2660 T(no_offload, 0, 0, 0, 0, 0, 0, 0, 4, \
2661 NIX_TX_OFFLOAD_NONE) \
2662 T(l3l4csum, 0, 0, 0, 0, 0, 0, 1, 4, \
2664 T(ol3ol4csum, 0, 0, 0, 0, 0, 1, 0, 4, \
2666 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 0, 1, 1, 4, \
2667 OL3OL4CSUM_F | L3L4CSUM_F) \
2668 T(vlan, 0, 0, 0, 0, 1, 0, 0, 6, \
2670 T(vlan_l3l4csum, 0, 0, 0, 0, 1, 0, 1, 6, \
2671 VLAN_F | L3L4CSUM_F) \
2672 T(vlan_ol3ol4csum, 0, 0, 0, 0, 1, 1, 0, 6, \
2673 VLAN_F | OL3OL4CSUM_F) \
2674 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 1, 6, \
2675 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2676 T(noff, 0, 0, 0, 1, 0, 0, 0, 4, \
2678 T(noff_l3l4csum, 0, 0, 0, 1, 0, 0, 1, 4, \
2679 NOFF_F | L3L4CSUM_F) \
2680 T(noff_ol3ol4csum, 0, 0, 0, 1, 0, 1, 0, 4, \
2681 NOFF_F | OL3OL4CSUM_F) \
2682 T(noff_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 0, 1, 1, 4, \
2683 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2684 T(noff_vlan, 0, 0, 0, 1, 1, 0, 0, 6, \
2686 T(noff_vlan_l3l4csum, 0, 0, 0, 1, 1, 0, 1, 6, \
2687 NOFF_F | VLAN_F | L3L4CSUM_F) \
2688 T(noff_vlan_ol3ol4csum, 0, 0, 0, 1, 1, 1, 0, 6, \
2689 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2690 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 1, 6, \
2691 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2692 T(tso, 0, 0, 1, 0, 0, 0, 0, 6, \
2694 T(tso_l3l4csum, 0, 0, 1, 0, 0, 0, 1, 6, \
2695 TSO_F | L3L4CSUM_F) \
2696 T(tso_ol3ol4csum, 0, 0, 1, 0, 0, 1, 0, 6, \
2697 TSO_F | OL3OL4CSUM_F) \
2698 T(tso_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 0, 1, 1, 6, \
2699 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2700 T(tso_vlan, 0, 0, 1, 0, 1, 0, 0, 6, \
2702 T(tso_vlan_l3l4csum, 0, 0, 1, 0, 1, 0, 1, 6, \
2703 TSO_F | VLAN_F | L3L4CSUM_F) \
2704 T(tso_vlan_ol3ol4csum, 0, 0, 1, 0, 1, 1, 0, 6, \
2705 TSO_F | VLAN_F | OL3OL4CSUM_F) \
2706 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 1, 6, \
2707 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2708 T(tso_noff, 0, 0, 1, 1, 0, 0, 0, 6, \
2710 T(tso_noff_l3l4csum, 0, 0, 1, 1, 0, 0, 1, 6, \
2711 TSO_F | NOFF_F | L3L4CSUM_F) \
2712 T(tso_noff_ol3ol4csum, 0, 0, 1, 1, 0, 1, 0, 6, \
2713 TSO_F | NOFF_F | OL3OL4CSUM_F) \
2714 T(tso_noff_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 0, 1, 1, 6, \
2715 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2716 T(tso_noff_vlan, 0, 0, 1, 1, 1, 0, 0, 6, \
2717 TSO_F | NOFF_F | VLAN_F) \
2718 T(tso_noff_vlan_l3l4csum, 0, 0, 1, 1, 1, 0, 1, 6, \
2719 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2720 T(tso_noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 1, 0, 6, \
2721 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2722 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 1, 6, \
2723 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2724 T(ts, 0, 1, 0, 0, 0, 0, 0, 8, \
2726 T(ts_l3l4csum, 0, 1, 0, 0, 0, 0, 1, 8, \
2727 TSP_F | L3L4CSUM_F) \
2728 T(ts_ol3ol4csum, 0, 1, 0, 0, 0, 1, 0, 8, \
2729 TSP_F | OL3OL4CSUM_F) \
2730 T(ts_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 0, 1, 1, 8, \
2731 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2732 T(ts_vlan, 0, 1, 0, 0, 1, 0, 0, 8, \
2734 T(ts_vlan_l3l4csum, 0, 1, 0, 0, 1, 0, 1, 8, \
2735 TSP_F | VLAN_F | L3L4CSUM_F) \
2736 T(ts_vlan_ol3ol4csum, 0, 1, 0, 0, 1, 1, 0, 8, \
2737 TSP_F | VLAN_F | OL3OL4CSUM_F) \
2738 T(ts_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 1, 8, \
2739 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2740 T(ts_noff, 0, 1, 0, 1, 0, 0, 0, 8, \
2742 T(ts_noff_l3l4csum, 0, 1, 0, 1, 0, 0, 1, 8, \
2743 TSP_F | NOFF_F | L3L4CSUM_F) \
2744 T(ts_noff_ol3ol4csum, 0, 1, 0, 1, 0, 1, 0, 8, \
2745 TSP_F | NOFF_F | OL3OL4CSUM_F) \
2746 T(ts_noff_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 0, 1, 1, 8, \
2747 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2748 T(ts_noff_vlan, 0, 1, 0, 1, 1, 0, 0, 8, \
2749 TSP_F | NOFF_F | VLAN_F) \
2750 T(ts_noff_vlan_l3l4csum, 0, 1, 0, 1, 1, 0, 1, 8, \
2751 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2752 T(ts_noff_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 1, 0, 8, \
2753 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2754 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 1, 8, \
2755 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2756 T(ts_tso, 0, 1, 1, 0, 0, 0, 0, 8, \
2758 T(ts_tso_l3l4csum, 0, 1, 1, 0, 0, 0, 1, 8, \
2759 TSP_F | TSO_F | L3L4CSUM_F) \
2760 T(ts_tso_ol3ol4csum, 0, 1, 1, 0, 0, 1, 0, 8, \
2761 TSP_F | TSO_F | OL3OL4CSUM_F) \
2762 T(ts_tso_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 0, 1, 1, 8, \
2763 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2764 T(ts_tso_vlan, 0, 1, 1, 0, 1, 0, 0, 8, \
2765 TSP_F | TSO_F | VLAN_F) \
2766 T(ts_tso_vlan_l3l4csum, 0, 1, 1, 0, 1, 0, 1, 8, \
2767 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2768 T(ts_tso_vlan_ol3ol4csum, 0, 1, 1, 0, 1, 1, 0, 8, \
2769 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2770 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 1, 8, \
2771 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2772 T(ts_tso_noff, 0, 1, 1, 1, 0, 0, 0, 8, \
2773 TSP_F | TSO_F | NOFF_F) \
2774 T(ts_tso_noff_l3l4csum, 0, 1, 1, 1, 0, 0, 1, 8, \
2775 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2776 T(ts_tso_noff_ol3ol4csum, 0, 1, 1, 1, 0, 1, 0, 8, \
2777 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2778 T(ts_tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 0, 1, 1, 8, \
2779 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2780 T(ts_tso_noff_vlan, 0, 1, 1, 1, 1, 0, 0, 8, \
2781 TSP_F | TSO_F | NOFF_F | VLAN_F) \
2782 T(ts_tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 1, 0, 1, 8, \
2783 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2784 T(ts_tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 1, 0, 8, \
2785 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2786 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 1, 8, \
2787 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2788 T(sec, 1, 0, 0, 0, 0, 0, 0, 4, \
2790 T(sec_l3l4csum, 1, 0, 0, 0, 0, 0, 1, 4, \
2791 T_SEC_F | L3L4CSUM_F) \
2792 T(sec_ol3ol4csum, 1, 0, 0, 0, 0, 1, 0, 4, \
2793 T_SEC_F | OL3OL4CSUM_F) \
2794 T(sec_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 0, 1, 1, 4, \
2795 T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2796 T(sec_vlan, 1, 0, 0, 0, 1, 0, 0, 6, \
2798 T(sec_vlan_l3l4csum, 1, 0, 0, 0, 1, 0, 1, 6, \
2799 T_SEC_F | VLAN_F | L3L4CSUM_F) \
2800 T(sec_vlan_ol3ol4csum, 1, 0, 0, 0, 1, 1, 0, 6, \
2801 T_SEC_F | VLAN_F | OL3OL4CSUM_F) \
2802 T(sec_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 1, 6, \
2803 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2804 T(sec_noff, 1, 0, 0, 1, 0, 0, 0, 4, \
2806 T(sec_noff_l3l4csum, 1, 0, 0, 1, 0, 0, 1, 4, \
2807 T_SEC_F | NOFF_F | L3L4CSUM_F) \
2808 T(sec_noff_ol3ol4csum, 1, 0, 0, 1, 0, 1, 0, 4, \
2809 T_SEC_F | NOFF_F | OL3OL4CSUM_F) \
2810 T(sec_noff_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 0, 1, 1, 4, \
2811 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2812 T(sec_noff_vlan, 1, 0, 0, 1, 1, 0, 0, 6, \
2813 T_SEC_F | NOFF_F | VLAN_F) \
2814 T(sec_noff_vlan_l3l4csum, 1, 0, 0, 1, 1, 0, 1, 6, \
2815 T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2816 T(sec_noff_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 1, 0, 6, \
2817 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2818 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 1, 6, \
2819 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2820 T(sec_tso, 1, 0, 1, 0, 0, 0, 0, 6, \
2822 T(sec_tso_l3l4csum, 1, 0, 1, 0, 0, 0, 1, 6, \
2823 T_SEC_F | TSO_F | L3L4CSUM_F) \
2824 T(sec_tso_ol3ol4csum, 1, 0, 1, 0, 0, 1, 0, 6, \
2825 T_SEC_F | TSO_F | OL3OL4CSUM_F) \
2826 T(sec_tso_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 0, 1, 1, 6, \
2827 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2828 T(sec_tso_vlan, 1, 0, 1, 0, 1, 0, 0, 6, \
2829 T_SEC_F | TSO_F | VLAN_F) \
2830 T(sec_tso_vlan_l3l4csum, 1, 0, 1, 0, 1, 0, 1, 6, \
2831 T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2832 T(sec_tso_vlan_ol3ol4csum, 1, 0, 1, 0, 1, 1, 0, 6, \
2833 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2834 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 1, 6, \
2835 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2836 T(sec_tso_noff, 1, 0, 1, 1, 0, 0, 0, 6, \
2837 T_SEC_F | TSO_F | NOFF_F) \
2838 T(sec_tso_noff_l3l4csum, 1, 0, 1, 1, 0, 0, 1, 6, \
2839 T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2840 T(sec_tso_noff_ol3ol4csum, 1, 0, 1, 1, 0, 1, 0, 6, \
2841 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2842 T(sec_tso_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 0, 1, 1, 6, \
2843 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2844 T(sec_tso_noff_vlan, 1, 0, 1, 1, 1, 0, 0, 6, \
2845 T_SEC_F | TSO_F | NOFF_F | VLAN_F) \
2846 T(sec_tso_noff_vlan_l3l4csum, 1, 0, 1, 1, 1, 0, 1, 6, \
2847 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2848 T(sec_tso_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 1, 0, 6, \
2849 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2850 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 1, 6, \
2851 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2852 T(sec_ts, 1, 1, 0, 0, 0, 0, 0, 8, \
2854 T(sec_ts_l3l4csum, 1, 1, 0, 0, 0, 0, 1, 8, \
2855 T_SEC_F | TSP_F | L3L4CSUM_F) \
2856 T(sec_ts_ol3ol4csum, 1, 1, 0, 0, 0, 1, 0, 8, \
2857 T_SEC_F | TSP_F | OL3OL4CSUM_F) \
2858 T(sec_ts_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 0, 1, 1, 8, \
2859 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2860 T(sec_ts_vlan, 1, 1, 0, 0, 1, 0, 0, 8, \
2861 T_SEC_F | TSP_F | VLAN_F) \
2862 T(sec_ts_vlan_l3l4csum, 1, 1, 0, 0, 1, 0, 1, 8, \
2863 T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \
2864 T(sec_ts_vlan_ol3ol4csum, 1, 1, 0, 0, 1, 1, 0, 8, \
2865 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \
2866 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 1, 8, \
2867 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2868 T(sec_ts_noff, 1, 1, 0, 1, 0, 0, 0, 8, \
2869 T_SEC_F | TSP_F | NOFF_F) \
2870 T(sec_ts_noff_l3l4csum, 1, 1, 0, 1, 0, 0, 1, 8, \
2871 T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \
2872 T(sec_ts_noff_ol3ol4csum, 1, 1, 0, 1, 0, 1, 0, 8, \
2873 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \
2874 T(sec_ts_noff_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 0, 1, 1, 8, \
2875 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2876 T(sec_ts_noff_vlan, 1, 1, 0, 1, 1, 0, 0, 8, \
2877 T_SEC_F | TSP_F | NOFF_F | VLAN_F) \
2878 T(sec_ts_noff_vlan_l3l4csum, 1, 1, 0, 1, 1, 0, 1, 8, \
2879 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2880 T(sec_ts_noff_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 1, 0, 8, \
2881 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2882 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 1, 8, \
2883 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2884 T(sec_ts_tso, 1, 1, 1, 0, 0, 0, 0, 8, \
2885 T_SEC_F | TSP_F | TSO_F) \
2886 T(sec_ts_tso_l3l4csum, 1, 1, 1, 0, 0, 0, 1, 8, \
2887 T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \
2888 T(sec_ts_tso_ol3ol4csum, 1, 1, 1, 0, 0, 1, 0, 8, \
2889 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \
2890 T(sec_ts_tso_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 0, 1, 1, 8, \
2891 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2892 T(sec_ts_tso_vlan, 1, 1, 1, 0, 1, 0, 0, 8, \
2893 T_SEC_F | TSP_F | TSO_F | VLAN_F) \
2894 T(sec_ts_tso_vlan_l3l4csum, 1, 1, 1, 0, 1, 0, 1, 8, \
2895 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2896 T(sec_ts_tso_vlan_ol3ol4csum, 1, 1, 1, 0, 1, 1, 0, 8, \
2897 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2898 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 1, 8, \
2899 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2900 T(sec_ts_tso_noff, 1, 1, 1, 1, 0, 0, 0, 8, \
2901 T_SEC_F | TSP_F | TSO_F | NOFF_F) \
2902 T(sec_ts_tso_noff_l3l4csum, 1, 1, 1, 1, 0, 0, 1, 8, \
2903 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2904 T(sec_ts_tso_noff_ol3ol4csum, 1, 1, 1, 1, 0, 1, 0, 8, \
2905 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2906 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 0, 1, 1, 8, \
2907 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2908 T(sec_ts_tso_noff_vlan, 1, 1, 1, 1, 1, 0, 0, 8, \
2909 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \
2910 T(sec_ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 1, 0, 1, 8, \
2911 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2912 T(sec_ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 1, 0, 8, \
2913 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)\
2914 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 1, 8, \
2915 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2918 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \
2919 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
2920 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2922 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
2923 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2925 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
2926 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2928 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2929 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2931 NIX_TX_FASTPATH_MODES
2934 #endif /* __CN10K_TX_H__ */