1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
16 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
18 /* Flags to control xmit_prepare function.
19 * Defining it from backwards to denote its been
20 * not used as offload flags to pick function
22 #define NIX_TX_VWQE_F BIT(14)
23 #define NIX_TX_MULTI_SEG_F BIT(15)
25 #define NIX_TX_NEED_SEND_HDR_W1 \
26 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
27 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
29 #define NIX_TX_NEED_EXT_HDR \
30 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
35 /* Cached value is low, Update the fc_cache_pkts */ \
36 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
37 /* Multiply with sqe_per_sqb to express in pkts */ \
38 (txq)->fc_cache_pkts = \
39 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
40 << (txq)->sqes_per_sqb_log2; \
41 /* Check it again for the room */ \
42 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
47 /* Encoded number of segments to number of dwords macro, each value of nb_segs
48 * is encoded as 4bits.
50 #define NIX_SEGDW_MAGIC 0x76654432210ULL
52 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
54 /* Function to determine no of tx subdesc required in case ext
55 * sub desc is enabled.
57 static __rte_always_inline int
58 cn10k_nix_tx_ext_subs(const uint16_t flags)
60 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
63 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
68 static __rte_always_inline uint8_t
69 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
71 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
72 << ROC_LMT_LINES_PER_CORE_LOG2;
75 static __rte_always_inline uint8_t
76 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
78 return (flags & NIX_TX_NEED_EXT_HDR) ?
79 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
83 static __rte_always_inline uint64_t
84 cn10k_nix_tx_steor_data(const uint16_t flags)
86 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
89 /* This will be moved to addr area */
91 /* 15 vector sizes for single seg */
111 static __rte_always_inline uint8_t
112 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
114 return ((flags & NIX_TX_NEED_EXT_HDR) ?
115 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
119 static __rte_always_inline uint64_t
120 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
122 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
125 /* This will be moved to addr area */
127 /* 15 vector sizes for single seg */
147 static __rte_always_inline void
148 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
149 const uint16_t flags)
152 cmd[0] = txq->send_hdr_w0;
156 /* Send ext if present */
157 if (flags & NIX_TX_NEED_EXT_HDR) {
158 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
167 static __rte_always_inline void
168 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
170 uint64_t mask, ol_flags = m->ol_flags;
172 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
173 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
174 uint16_t *iplen, *oiplen, *oudplen;
175 uint16_t lso_sb, paylen;
177 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
178 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
179 m->l2_len + m->l3_len + m->l4_len;
181 /* Reduce payload len from base headers */
182 paylen = m->pkt_len - lso_sb;
184 /* Get iplen position assuming no tunnel hdr */
185 iplen = (uint16_t *)(mdata + m->l2_len +
186 (2 << !!(ol_flags & PKT_TX_IPV6)));
187 /* Handle tunnel tso */
188 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
189 (ol_flags & PKT_TX_TUNNEL_MASK)) {
190 const uint8_t is_udp_tun =
191 (CNXK_NIX_UDP_TUN_BITMASK >>
192 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
195 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
197 PKT_TX_OUTER_IPV6)));
198 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
201 /* Update format for UDP tunneled packet */
203 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
204 m->outer_l3_len + 4);
205 *oudplen = rte_cpu_to_be_16(
206 rte_be_to_cpu_16(*oudplen) - paylen);
209 /* Update iplen position to inner ip hdr */
210 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
212 (2 << !!(ol_flags & PKT_TX_IPV6)));
215 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
219 static __rte_always_inline void
220 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
221 const uint16_t flags, const uint64_t lso_tun_fmt)
223 struct nix_send_ext_s *send_hdr_ext;
224 struct nix_send_hdr_s *send_hdr;
225 uint64_t ol_flags = 0, mask;
226 union nix_send_hdr_w1_u w1;
227 union nix_send_sg_s *sg;
229 send_hdr = (struct nix_send_hdr_s *)cmd;
230 if (flags & NIX_TX_NEED_EXT_HDR) {
231 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
232 sg = (union nix_send_sg_s *)(cmd + 4);
233 /* Clear previous markings */
234 send_hdr_ext->w0.lso = 0;
235 send_hdr_ext->w1.u = 0;
237 sg = (union nix_send_sg_s *)(cmd + 2);
240 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
241 ol_flags = m->ol_flags;
245 if (!(flags & NIX_TX_MULTI_SEG_F)) {
246 send_hdr->w0.total = m->data_len;
248 roc_npa_aura_handle_to_aura(m->pool->pool_id);
253 * 3 => IPV4 with csum
255 * L3type and L3ptr needs to be set for either
256 * L3 csum or L4 csum or LSO
260 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
261 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
262 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
263 const uint8_t ol3type =
264 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
265 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
266 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
269 w1.ol3type = ol3type;
270 mask = 0xffffull << ((!!ol3type) << 4);
271 w1.ol3ptr = ~mask & m->outer_l2_len;
272 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
275 w1.ol4type = csum + (csum << 1);
278 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
279 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
280 w1.il3ptr = w1.ol4ptr + m->l2_len;
281 w1.il4ptr = w1.il3ptr + m->l3_len;
282 /* Increment it by 1 if it is IPV4 as 3 is with csum */
283 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
286 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
288 /* In case of no tunnel header use only
289 * shift IL3/IL4 fields a bit to use
290 * OL3/OL4 for header checksum
293 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
294 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
296 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
297 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
298 const uint8_t outer_l2_len = m->outer_l2_len;
301 w1.ol3ptr = outer_l2_len;
302 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
303 /* Increment it by 1 if it is IPV4 as 3 is with csum */
304 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
305 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
306 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
309 w1.ol4type = csum + (csum << 1);
311 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
312 const uint8_t l2_len = m->l2_len;
314 /* Always use OLXPTR and OLXTYPE when only
315 * when one header is present
320 w1.ol4ptr = l2_len + m->l3_len;
321 /* Increment it by 1 if it is IPV4 as 3 is with csum */
322 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
323 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
324 !!(ol_flags & PKT_TX_IP_CKSUM);
327 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
330 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
331 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
332 /* HW will update ptr after vlan0 update */
333 send_hdr_ext->w1.vlan1_ins_ptr = 12;
334 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
336 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
337 /* 2B before end of l2 header */
338 send_hdr_ext->w1.vlan0_ins_ptr = 12;
339 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
342 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
346 mask = -(!w1.il3type);
347 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
349 send_hdr_ext->w0.lso_sb = lso_sb;
350 send_hdr_ext->w0.lso = 1;
351 send_hdr_ext->w0.lso_mps = m->tso_segsz;
352 send_hdr_ext->w0.lso_format =
353 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
354 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
356 /* Handle tunnel tso */
357 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
358 (ol_flags & PKT_TX_TUNNEL_MASK)) {
359 const uint8_t is_udp_tun =
360 (CNXK_NIX_UDP_TUN_BITMASK >>
361 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
363 uint8_t shift = is_udp_tun ? 32 : 0;
365 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
366 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
368 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
369 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
370 /* Update format for UDP tunneled packet */
371 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
375 if (flags & NIX_TX_NEED_SEND_HDR_W1)
376 send_hdr->w1.u = w1.u;
378 if (!(flags & NIX_TX_MULTI_SEG_F)) {
379 sg->seg1_size = m->data_len;
380 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
382 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
383 /* DF bit = 1 if refcount of current mbuf or parent mbuf
385 * DF bit = 0 otherwise
387 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
389 /* Mark mempool object as "put" since it is freed by NIX */
390 if (!send_hdr->w0.df)
391 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
394 /* With minimal offloads, 'cmd' being local could be optimized out to
395 * registers. In other cases, 'cmd' will be in stack. Intent is
396 * 'cmd' stores content from txq->cmd which is copied only once.
398 *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
400 if (flags & NIX_TX_NEED_EXT_HDR) {
401 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
404 /* In case of multi-seg, sg template is stored here */
405 *((union nix_send_sg_s *)lmt_addr) = *sg;
406 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
409 static __rte_always_inline void
410 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
411 const uint64_t ol_flags, const uint16_t no_segdw,
412 const uint16_t flags)
414 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
415 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
416 struct nix_send_ext_s *send_hdr_ext =
417 (struct nix_send_ext_s *)lmt_addr + 16;
418 uint64_t *lmt = (uint64_t *)lmt_addr;
419 uint16_t off = (no_segdw - 1) << 1;
420 struct nix_send_mem_s *send_mem;
422 send_mem = (struct nix_send_mem_s *)(lmt + off);
423 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
424 send_hdr_ext->w0.tstmp = 1;
425 if (flags & NIX_TX_MULTI_SEG_F) {
426 /* Retrieving the default desc values */
429 /* Using compiler barier to avoid voilation of C
432 rte_compiler_barrier();
435 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
436 * should not be recorded, hence changing the alg type to
437 * NIX_SENDMEMALG_SET and also changing send mem addr field to
438 * next 8 bytes as it corrpt the actual tx tstamp registered
441 send_mem->w0.subdc = NIX_SUBDC_MEM;
442 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
444 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
448 static __rte_always_inline uint16_t
449 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
451 struct nix_send_hdr_s *send_hdr;
452 union nix_send_sg_s *sg;
453 struct rte_mbuf *m_next;
454 uint64_t *slist, sg_u;
459 send_hdr = (struct nix_send_hdr_s *)cmd;
460 send_hdr->w0.total = m->pkt_len;
461 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
463 if (flags & NIX_TX_NEED_EXT_HDR)
468 sg = (union nix_send_sg_s *)&cmd[2 + off];
469 /* Clear sg->u header before use */
470 sg->u &= 0xFC00000000000000;
472 slist = &cmd[3 + off];
475 nb_segs = m->nb_segs;
477 /* Fill mbuf segments */
480 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
481 *slist = rte_mbuf_data_iova(m);
482 /* Set invert df if buffer is not to be freed by H/W */
483 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
484 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
485 /* Mark mempool object as "put" since it is freed by NIX
487 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
488 if (!(sg_u & (1ULL << (i + 55))))
489 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
494 if (i > 2 && nb_segs) {
496 /* Next SG subdesc */
497 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
500 sg = (union nix_send_sg_s *)slist;
509 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
510 /* Roundup extra dwords to multiple of 2 */
511 segdw = (segdw >> 1) + (segdw & 0x1);
513 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
514 send_hdr->w0.sizem1 = segdw - 1;
519 static __rte_always_inline uint16_t
520 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
521 uint64_t *cmd, uintptr_t base, const uint16_t flags)
523 struct cn10k_eth_txq *txq = tx_queue;
524 const rte_iova_t io_addr = txq->io_addr;
525 uintptr_t pa, lmt_addr = txq->lmt_base;
526 uint16_t lmt_id, burst, left, i;
527 uint64_t lso_tun_fmt;
530 if (!(flags & NIX_TX_VWQE_F)) {
531 NIX_XMIT_FC_OR_RETURN(txq, pkts);
532 /* Reduce the cached count */
533 txq->fc_cache_pkts -= pkts;
536 /* Get cmd skeleton */
537 cn10k_nix_tx_skeleton(txq, cmd, flags);
539 if (flags & NIX_TX_OFFLOAD_TSO_F)
540 lso_tun_fmt = txq->lso_tun_fmt;
542 /* Get LMT base address and LMT ID as lcore id */
543 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
546 burst = left > 32 ? 32 : left;
547 for (i = 0; i < burst; i++) {
548 /* Perform header writes for TSO, barrier at
549 * lmt steorl will suffice.
551 if (flags & NIX_TX_OFFLOAD_TSO_F)
552 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
554 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
556 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
557 tx_pkts[i]->ol_flags, 4, flags);
558 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
561 if (flags & NIX_TX_VWQE_F)
562 roc_sso_hws_head_wait(base);
566 data = cn10k_nix_tx_steor_data(flags);
567 pa = io_addr | (data & 0x7) << 4;
569 data |= (15ULL << 12);
570 data |= (uint64_t)lmt_id;
573 roc_lmt_submit_steorl(data, pa);
575 data = cn10k_nix_tx_steor_data(flags);
576 pa = io_addr | (data & 0x7) << 4;
578 data |= ((uint64_t)(burst - 17)) << 12;
579 data |= (uint64_t)(lmt_id + 16);
582 roc_lmt_submit_steorl(data, pa);
584 data = cn10k_nix_tx_steor_data(flags);
585 pa = io_addr | (data & 0x7) << 4;
587 data |= ((uint64_t)(burst - 1)) << 12;
591 roc_lmt_submit_steorl(data, pa);
597 /* Start processing another burst */
599 /* Reset lmt base addr */
600 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
601 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
608 static __rte_always_inline uint16_t
609 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
610 uint16_t pkts, uint64_t *cmd, uintptr_t base,
611 const uint16_t flags)
613 struct cn10k_eth_txq *txq = tx_queue;
614 uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
615 const rte_iova_t io_addr = txq->io_addr;
616 uint16_t segdw, lmt_id, burst, left, i;
617 uint64_t data0, data1;
618 uint64_t lso_tun_fmt;
622 NIX_XMIT_FC_OR_RETURN(txq, pkts);
624 cn10k_nix_tx_skeleton(txq, cmd, flags);
626 /* Reduce the cached count */
627 txq->fc_cache_pkts -= pkts;
629 if (flags & NIX_TX_OFFLOAD_TSO_F)
630 lso_tun_fmt = txq->lso_tun_fmt;
632 /* Get LMT base address and LMT ID as lcore id */
633 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
636 burst = left > 32 ? 32 : left;
639 for (i = 0; i < burst; i++) {
640 /* Perform header writes for TSO, barrier at
641 * lmt steorl will suffice.
643 if (flags & NIX_TX_OFFLOAD_TSO_F)
644 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
646 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
648 /* Store sg list directly on lmt line */
649 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
651 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
652 tx_pkts[i]->ol_flags, segdw,
654 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
655 data128 |= (((__uint128_t)(segdw - 1)) << shft);
659 if (flags & NIX_TX_VWQE_F)
660 roc_sso_hws_head_wait(base);
662 data0 = (uint64_t)data128;
663 data1 = (uint64_t)(data128 >> 64);
664 /* Make data0 similar to data1 */
668 pa0 = io_addr | (data0 & 0x7) << 4;
670 /* Move lmtst1..15 sz to bits 63:19 */
672 data0 |= (15ULL << 12);
673 data0 |= (uint64_t)lmt_id;
676 roc_lmt_submit_steorl(data0, pa0);
678 pa1 = io_addr | (data1 & 0x7) << 4;
681 data1 |= ((uint64_t)(burst - 17)) << 12;
682 data1 |= (uint64_t)(lmt_id + 16);
685 roc_lmt_submit_steorl(data1, pa1);
687 pa0 = io_addr | (data0 & 0x7) << 4;
689 /* Move lmtst1..15 sz to bits 63:19 */
691 data0 |= ((burst - 1) << 12);
692 data0 |= (uint64_t)lmt_id;
695 roc_lmt_submit_steorl(data0, pa0);
701 /* Start processing another burst */
703 /* Reset lmt base addr */
704 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
705 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
712 #if defined(RTE_ARCH_ARM64)
714 static __rte_always_inline void
715 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
716 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
717 const uint64_t flags, const uint64_t lso_tun_fmt)
722 if (!(ol_flags & PKT_TX_TCP_SEG))
725 mask = -(!w1->il3type);
726 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
730 w0->lso_mps = m->tso_segsz;
731 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
732 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
734 /* Handle tunnel tso */
735 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
736 (ol_flags & PKT_TX_TUNNEL_MASK)) {
737 const uint8_t is_udp_tun =
738 (CNXK_NIX_UDP_TUN_BITMASK >>
739 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
741 uint8_t shift = is_udp_tun ? 32 : 0;
743 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
744 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
746 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
747 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
748 /* Update format for UDP tunneled packet */
750 w0->lso_format = (lso_tun_fmt >> shift);
754 static __rte_always_inline void
755 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
756 union nix_send_hdr_w0_u *sh,
757 union nix_send_sg_s *sg, const uint32_t flags)
759 struct rte_mbuf *m_next;
760 uint64_t *slist, sg_u;
764 sh->total = m->pkt_len;
765 /* Clear sg->u header before use */
766 sg->u &= 0xFC00000000000000;
770 sg_u = sg_u | ((uint64_t)m->data_len);
772 nb_segs = m->nb_segs - 1;
775 /* Set invert df if buffer is not to be freed by H/W */
776 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
777 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
778 /* Mark mempool object as "put" since it is freed by NIX */
779 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
780 if (!(sg_u & (1ULL << 55)))
781 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
786 /* Fill mbuf segments */
789 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
790 *slist = rte_mbuf_data_iova(m);
791 /* Set invert df if buffer is not to be freed by H/W */
792 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
793 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
794 /* Mark mempool object as "put" since it is freed by NIX
796 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
797 if (!(sg_u & (1ULL << (i + 55))))
798 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
804 if (i > 2 && nb_segs) {
806 /* Next SG subdesc */
807 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
810 sg = (union nix_send_sg_s *)slist;
821 static __rte_always_inline void
822 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
823 uint64x2_t *cmd1, const uint8_t segdw,
824 const uint32_t flags)
826 union nix_send_hdr_w0_u sh;
827 union nix_send_sg_s sg;
829 if (m->nb_segs == 1) {
830 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
831 sg.u = vgetq_lane_u64(cmd1[0], 0);
832 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
833 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
836 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
837 sg.u = vgetq_lane_u64(cmd1[0], 0);
838 if (!(sg.u & (1ULL << 55)))
839 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
845 sh.u = vgetq_lane_u64(cmd0[0], 0);
846 sg.u = vgetq_lane_u64(cmd1[0], 0);
848 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
850 sh.sizem1 = segdw - 1;
851 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
852 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
855 #define NIX_DESCS_PER_LOOP 4
857 static __rte_always_inline uint8_t
858 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
859 uint64x2_t *cmd1, uint64x2_t *cmd2,
860 uint64x2_t *cmd3, uint8_t *segdw,
861 uint64_t *lmt_addr, __uint128_t *data128,
862 uint8_t *shift, const uint16_t flags)
864 uint8_t j, off, lmt_used;
866 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
867 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
868 /* No segments in 4 consecutive packets. */
869 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
870 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
871 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
874 vst1q_u64(lmt_addr, cmd0[0]);
875 vst1q_u64(lmt_addr + 2, cmd1[0]);
876 vst1q_u64(lmt_addr + 4, cmd0[1]);
877 vst1q_u64(lmt_addr + 6, cmd1[1]);
878 vst1q_u64(lmt_addr + 8, cmd0[2]);
879 vst1q_u64(lmt_addr + 10, cmd1[2]);
880 vst1q_u64(lmt_addr + 12, cmd0[3]);
881 vst1q_u64(lmt_addr + 14, cmd1[3]);
883 *data128 |= ((__uint128_t)7) << *shift;
891 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
892 /* Fit consecutive packets in same LMTLINE. */
893 if ((segdw[j] + segdw[j + 1]) <= 8) {
894 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
895 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
898 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
901 segdw[j + 1], flags);
902 /* TSTAMP takes 4 each, no segs. */
903 vst1q_u64(lmt_addr, cmd0[j]);
904 vst1q_u64(lmt_addr + 2, cmd2[j]);
905 vst1q_u64(lmt_addr + 4, cmd1[j]);
906 vst1q_u64(lmt_addr + 6, cmd3[j]);
908 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
909 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
910 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
911 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
912 } else if (flags & NIX_TX_NEED_EXT_HDR) {
913 /* EXT header take 3 each, space for 2 segs.*/
914 cn10k_nix_prepare_mseg_vec(mbufs[j],
918 vst1q_u64(lmt_addr, cmd0[j]);
919 vst1q_u64(lmt_addr + 2, cmd2[j]);
920 vst1q_u64(lmt_addr + 4, cmd1[j]);
923 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
927 segdw[j + 1], flags);
928 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
929 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
930 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
932 cn10k_nix_prepare_mseg_vec(mbufs[j],
936 vst1q_u64(lmt_addr, cmd0[j]);
937 vst1q_u64(lmt_addr + 2, cmd1[j]);
940 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
944 segdw[j + 1], flags);
945 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
946 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
948 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
953 if ((flags & NIX_TX_NEED_EXT_HDR) &&
954 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
955 cn10k_nix_prepare_mseg_vec(mbufs[j],
959 vst1q_u64(lmt_addr, cmd0[j]);
960 vst1q_u64(lmt_addr + 2, cmd2[j]);
961 vst1q_u64(lmt_addr + 4, cmd1[j]);
964 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
965 } else if (flags & NIX_TX_NEED_EXT_HDR) {
966 cn10k_nix_prepare_mseg_vec(mbufs[j],
970 vst1q_u64(lmt_addr, cmd0[j]);
971 vst1q_u64(lmt_addr + 2, cmd2[j]);
972 vst1q_u64(lmt_addr + 4, cmd1[j]);
974 cn10k_nix_prepare_mseg_vec(mbufs[j],
978 vst1q_u64(lmt_addr, cmd0[j]);
979 vst1q_u64(lmt_addr + 2, cmd1[j]);
981 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
992 static __rte_always_inline uint16_t
993 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
994 uint16_t pkts, uint64_t *cmd, uintptr_t base,
995 const uint16_t flags)
997 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
998 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
999 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1000 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1001 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1002 uint64x2_t senddesc01_w0, senddesc23_w0;
1003 uint64x2_t senddesc01_w1, senddesc23_w1;
1004 uint16_t left, scalar, burst, i, lmt_id;
1005 uint64x2_t sendext01_w0, sendext23_w0;
1006 uint64x2_t sendext01_w1, sendext23_w1;
1007 uint64x2_t sendmem01_w0, sendmem23_w0;
1008 uint64x2_t sendmem01_w1, sendmem23_w1;
1009 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1010 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1011 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1012 struct cn10k_eth_txq *txq = tx_queue;
1013 uintptr_t laddr = txq->lmt_base;
1014 rte_iova_t io_addr = txq->io_addr;
1015 uint64x2_t ltypes01, ltypes23;
1016 uint64x2_t xtmp128, ytmp128;
1017 uint64x2_t xmask01, xmask23;
1018 uint8_t lnum, shift;
1020 __uint128_t data128;
1024 if (!(flags & NIX_TX_VWQE_F)) {
1025 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1026 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1027 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1028 /* Reduce the cached count */
1029 txq->fc_cache_pkts -= pkts;
1031 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1032 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1035 /* Perform header writes before barrier for TSO */
1036 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1037 for (i = 0; i < pkts; i++)
1038 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1041 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1042 senddesc23_w0 = senddesc01_w0;
1043 senddesc01_w1 = vdupq_n_u64(0);
1044 senddesc23_w1 = senddesc01_w1;
1045 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1046 sgdesc23_w0 = sgdesc01_w0;
1048 /* Load command defaults into vector variables. */
1049 if (flags & NIX_TX_NEED_EXT_HDR) {
1050 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1051 sendext23_w0 = sendext01_w0;
1052 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1053 sendext23_w1 = sendext01_w1;
1054 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1055 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1056 sendmem23_w0 = sendmem01_w0;
1057 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1058 sendmem23_w1 = sendmem01_w1;
1062 /* Get LMT base address and LMT ID as lcore id */
1063 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1066 /* Number of packets to prepare depends on offloads enabled. */
1067 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1068 cn10k_nix_pkts_per_vec_brst(flags) :
1070 if (flags & NIX_TX_MULTI_SEG_F) {
1076 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1077 if (flags & NIX_TX_MULTI_SEG_F) {
1080 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1081 struct rte_mbuf *m = tx_pkts[j];
1083 /* Get dwords based on nb_segs. */
1084 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1085 /* Add dwords based on offloads. */
1086 segdw[j] += 1 + /* SEND HDR */
1087 !!(flags & NIX_TX_NEED_EXT_HDR) +
1088 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1091 /* Check if there are enough LMTLINES for this loop */
1092 if (lnum + 4 > 32) {
1093 uint8_t ldwords_con = 0, lneeded = 0;
1094 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1095 ldwords_con += segdw[j];
1096 if (ldwords_con > 8) {
1098 ldwords_con = segdw[j];
1102 if (lnum + lneeded > 32) {
1108 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1110 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1111 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1113 senddesc23_w0 = senddesc01_w0;
1114 sgdesc23_w0 = sgdesc01_w0;
1116 /* Clear vlan enables. */
1117 if (flags & NIX_TX_NEED_EXT_HDR) {
1118 sendext01_w1 = vbicq_u64(sendext01_w1,
1119 vdupq_n_u64(0x3FFFF00FFFF00));
1120 sendext23_w1 = sendext01_w1;
1123 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1124 /* Reset send mem alg to SETTSTMP from SUB*/
1125 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1126 vdupq_n_u64(BIT_ULL(59)));
1127 /* Reset send mem address to default. */
1129 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1130 sendmem23_w0 = sendmem01_w0;
1131 sendmem23_w1 = sendmem01_w1;
1134 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1135 /* Clear the LSO enable bit. */
1136 sendext01_w0 = vbicq_u64(sendext01_w0,
1137 vdupq_n_u64(BIT_ULL(14)));
1138 sendext23_w0 = sendext01_w0;
1141 /* Move mbufs to iova */
1142 mbuf0 = (uint64_t *)tx_pkts[0];
1143 mbuf1 = (uint64_t *)tx_pkts[1];
1144 mbuf2 = (uint64_t *)tx_pkts[2];
1145 mbuf3 = (uint64_t *)tx_pkts[3];
1147 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1148 offsetof(struct rte_mbuf, buf_iova));
1149 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1150 offsetof(struct rte_mbuf, buf_iova));
1151 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1152 offsetof(struct rte_mbuf, buf_iova));
1153 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1154 offsetof(struct rte_mbuf, buf_iova));
1156 * Get mbuf's, olflags, iova, pktlen, dataoff
1157 * dataoff_iovaX.D[0] = iova,
1158 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1159 * len_olflagsX.D[0] = ol_flags,
1160 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1162 dataoff_iova0 = vld1q_u64(mbuf0);
1163 len_olflags0 = vld1q_u64(mbuf0 + 2);
1164 dataoff_iova1 = vld1q_u64(mbuf1);
1165 len_olflags1 = vld1q_u64(mbuf1 + 2);
1166 dataoff_iova2 = vld1q_u64(mbuf2);
1167 len_olflags2 = vld1q_u64(mbuf2 + 2);
1168 dataoff_iova3 = vld1q_u64(mbuf3);
1169 len_olflags3 = vld1q_u64(mbuf3 + 2);
1171 /* Move mbufs to point pool */
1172 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1173 offsetof(struct rte_mbuf, pool) -
1174 offsetof(struct rte_mbuf, buf_iova));
1175 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1176 offsetof(struct rte_mbuf, pool) -
1177 offsetof(struct rte_mbuf, buf_iova));
1178 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1179 offsetof(struct rte_mbuf, pool) -
1180 offsetof(struct rte_mbuf, buf_iova));
1181 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1182 offsetof(struct rte_mbuf, pool) -
1183 offsetof(struct rte_mbuf, buf_iova));
1185 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1186 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1187 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1189 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1190 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1193 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1194 : [a] "+w"(senddesc01_w1)
1195 : [in] "r"(mbuf0 + 2)
1198 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1199 : [a] "+w"(senddesc01_w1)
1200 : [in] "r"(mbuf1 + 2)
1203 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1204 : [b] "+w"(senddesc23_w1)
1205 : [in] "r"(mbuf2 + 2)
1208 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1209 : [b] "+w"(senddesc23_w1)
1210 : [in] "r"(mbuf3 + 2)
1213 /* Get pool pointer alone */
1214 mbuf0 = (uint64_t *)*mbuf0;
1215 mbuf1 = (uint64_t *)*mbuf1;
1216 mbuf2 = (uint64_t *)*mbuf2;
1217 mbuf3 = (uint64_t *)*mbuf3;
1219 /* Get pool pointer alone */
1220 mbuf0 = (uint64_t *)*mbuf0;
1221 mbuf1 = (uint64_t *)*mbuf1;
1222 mbuf2 = (uint64_t *)*mbuf2;
1223 mbuf3 = (uint64_t *)*mbuf3;
1226 const uint8x16_t shuf_mask2 = {
1227 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1228 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1230 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1231 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1233 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1234 const uint64x2_t and_mask0 = {
1239 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1240 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1241 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1242 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1245 * Pick only 16 bits of pktlen preset at bits 63:32
1246 * and place them at bits 15:0.
1248 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1249 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1251 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1252 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1253 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1255 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1256 * pktlen at 15:0 position.
1258 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1259 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1260 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1261 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1263 /* Move mbuf to point to pool_id. */
1264 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1265 offsetof(struct rte_mempool, pool_id));
1266 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1267 offsetof(struct rte_mempool, pool_id));
1268 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1269 offsetof(struct rte_mempool, pool_id));
1270 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1271 offsetof(struct rte_mempool, pool_id));
1273 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1274 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1276 * Lookup table to translate ol_flags to
1277 * il3/il4 types. But we still use ol3/ol4 types in
1278 * senddesc_w1 as only one header processing is enabled.
1280 const uint8x16_t tbl = {
1281 /* [0-15] = il4type:il3type */
1282 0x04, /* none (IPv6 assumed) */
1283 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
1284 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
1285 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
1286 0x03, /* PKT_TX_IP_CKSUM */
1287 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
1288 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
1289 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
1290 0x02, /* PKT_TX_IPV4 */
1291 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
1292 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
1293 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
1294 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
1295 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1298 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1301 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1306 /* Extract olflags to translate to iltypes */
1307 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1308 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1311 * E(47):L3_LEN(9):L2_LEN(7+z)
1312 * E(47):L3_LEN(9):L2_LEN(7+z)
1314 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1315 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1317 /* Move OLFLAGS bits 55:52 to 51:48
1318 * with zeros preprended on the byte and rest
1321 xtmp128 = vshrq_n_u8(xtmp128, 4);
1322 ytmp128 = vshrq_n_u8(ytmp128, 4);
1324 * E(48):L3_LEN(8):L2_LEN(z+7)
1325 * E(48):L3_LEN(8):L2_LEN(z+7)
1327 const int8x16_t tshft3 = {
1328 -1, 0, 8, 8, 8, 8, 8, 8,
1329 -1, 0, 8, 8, 8, 8, 8, 8,
1332 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1333 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1336 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1337 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1339 /* Pick only relevant fields i.e Bit 48:55 of iltype
1340 * and place it in ol3/ol4type of senddesc_w1
1342 const uint8x16_t shuf_mask0 = {
1343 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1344 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1347 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1348 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1350 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1351 * a [E(32):E(16):OL3(8):OL2(8)]
1353 * a [E(32):E(16):(OL3+OL2):OL2]
1354 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1356 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1357 vshlq_n_u16(senddesc01_w1, 8));
1358 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1359 vshlq_n_u16(senddesc23_w1, 8));
1361 /* Move ltypes to senddesc*_w1 */
1362 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1363 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1364 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1365 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1367 * Lookup table to translate ol_flags to
1371 const uint8x16_t tbl = {
1372 /* [0-15] = ol4type:ol3type */
1374 0x03, /* OUTER_IP_CKSUM */
1375 0x02, /* OUTER_IPV4 */
1376 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1377 0x04, /* OUTER_IPV6 */
1378 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1379 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1380 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1383 0x00, /* OUTER_UDP_CKSUM */
1384 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1385 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1386 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1389 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1390 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1393 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1396 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1397 * OUTER_IPV4 | OUTER_IP_CKSUM
1401 /* Extract olflags to translate to iltypes */
1402 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1403 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1406 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1407 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1409 const uint8x16_t shuf_mask5 = {
1410 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1411 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1413 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1414 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1416 /* Extract outer ol flags only */
1417 const uint64x2_t o_cksum_mask = {
1422 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1423 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1425 /* Extract OUTER_UDP_CKSUM bit 41 and
1429 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1430 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1432 /* Shift oltype by 2 to start nibble from BIT(56)
1433 * instead of BIT(58)
1435 xtmp128 = vshrq_n_u8(xtmp128, 2);
1436 ytmp128 = vshrq_n_u8(ytmp128, 2);
1438 * E(48):L3_LEN(8):L2_LEN(z+7)
1439 * E(48):L3_LEN(8):L2_LEN(z+7)
1441 const int8x16_t tshft3 = {
1442 -1, 0, 8, 8, 8, 8, 8, 8,
1443 -1, 0, 8, 8, 8, 8, 8, 8,
1446 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1447 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1450 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1451 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1453 /* Pick only relevant fields i.e Bit 56:63 of oltype
1454 * and place it in ol3/ol4type of senddesc_w1
1456 const uint8x16_t shuf_mask0 = {
1457 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1458 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1461 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1462 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1464 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1465 * a [E(32):E(16):OL3(8):OL2(8)]
1467 * a [E(32):E(16):(OL3+OL2):OL2]
1468 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1470 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1471 vshlq_n_u16(senddesc01_w1, 8));
1472 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1473 vshlq_n_u16(senddesc23_w1, 8));
1475 /* Move ltypes to senddesc*_w1 */
1476 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1477 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1478 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1479 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1480 /* Lookup table to translate ol_flags to
1481 * ol4type, ol3type, il4type, il3type of senddesc_w1
1483 const uint8x16x2_t tbl = {{
1485 /* [0-15] = il4type:il3type */
1486 0x04, /* none (IPv6) */
1487 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1488 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1489 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1490 0x03, /* PKT_TX_IP_CKSUM */
1491 0x13, /* PKT_TX_IP_CKSUM |
1494 0x23, /* PKT_TX_IP_CKSUM |
1497 0x33, /* PKT_TX_IP_CKSUM |
1500 0x02, /* PKT_TX_IPV4 */
1501 0x12, /* PKT_TX_IPV4 |
1504 0x22, /* PKT_TX_IPV4 |
1507 0x32, /* PKT_TX_IPV4 |
1510 0x03, /* PKT_TX_IPV4 |
1513 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1516 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1519 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1525 /* [16-31] = ol4type:ol3type */
1527 0x03, /* OUTER_IP_CKSUM */
1528 0x02, /* OUTER_IPV4 */
1529 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1530 0x04, /* OUTER_IPV6 */
1531 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1532 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1533 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1536 0x00, /* OUTER_UDP_CKSUM */
1537 0x33, /* OUTER_UDP_CKSUM |
1540 0x32, /* OUTER_UDP_CKSUM |
1543 0x33, /* OUTER_UDP_CKSUM |
1544 * OUTER_IPV4 | OUTER_IP_CKSUM
1546 0x34, /* OUTER_UDP_CKSUM |
1549 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1552 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1555 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1556 * OUTER_IPV4 | OUTER_IP_CKSUM
1561 /* Extract olflags to translate to oltype & iltype */
1562 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1563 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1566 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1567 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1569 const uint32x4_t tshft_4 = {
1575 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1576 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1579 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1580 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1582 const uint8x16_t shuf_mask5 = {
1583 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1584 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1586 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1587 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1589 /* Extract outer and inner header ol_flags */
1590 const uint64x2_t oi_cksum_mask = {
1595 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1596 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1598 /* Extract OUTER_UDP_CKSUM bit 41 and
1602 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1603 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1605 /* Shift right oltype by 2 and iltype by 4
1606 * to start oltype nibble from BIT(58)
1607 * instead of BIT(56) and iltype nibble from BIT(48)
1608 * instead of BIT(52).
1610 const int8x16_t tshft5 = {
1611 8, 8, 8, 8, 8, 8, -4, -2,
1612 8, 8, 8, 8, 8, 8, -4, -2,
1615 xtmp128 = vshlq_u8(xtmp128, tshft5);
1616 ytmp128 = vshlq_u8(ytmp128, tshft5);
1618 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1619 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1621 const int8x16_t tshft3 = {
1622 -1, 0, -1, 0, 0, 0, 0, 0,
1623 -1, 0, -1, 0, 0, 0, 0, 0,
1626 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1627 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1629 /* Mark Bit(4) of oltype */
1630 const uint64x2_t oi_cksum_mask2 = {
1635 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1636 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1639 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1640 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1642 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1643 * Bit 56:63 of oltype and place it in corresponding
1644 * place in senddesc_w1.
1646 const uint8x16_t shuf_mask0 = {
1647 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1648 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1651 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1652 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1654 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1655 * l3len, l2len, ol3len, ol2len.
1656 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1658 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1660 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1661 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1663 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1664 vshlq_n_u32(senddesc01_w1, 8));
1665 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1666 vshlq_n_u32(senddesc23_w1, 8));
1668 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1669 senddesc01_w1 = vaddq_u8(
1670 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1671 senddesc23_w1 = vaddq_u8(
1672 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1674 /* Move ltypes to senddesc*_w1 */
1675 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1676 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1679 xmask01 = vdupq_n_u64(0);
1681 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1686 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1691 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1696 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1700 xmask01 = vshlq_n_u64(xmask01, 20);
1701 xmask23 = vshlq_n_u64(xmask23, 20);
1703 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1704 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1706 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1707 /* Tx ol_flag for vlan. */
1708 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1709 /* Bit enable for VLAN1 */
1710 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1711 /* Tx ol_flag for QnQ. */
1712 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1713 /* Bit enable for VLAN0 */
1714 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1715 /* Load vlan values from packet. outer is VLAN 0 */
1716 uint64x2_t ext01 = {
1717 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1718 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1719 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1720 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1722 uint64x2_t ext23 = {
1723 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1724 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1725 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1726 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1729 /* Get ol_flags of the packets. */
1730 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1731 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1733 /* ORR vlan outer/inner values into cmd. */
1734 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1735 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1737 /* Test for offload enable bits and generate masks. */
1738 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1740 vandq_u64(vtstq_u64(xtmp128, olq),
1742 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1744 vandq_u64(vtstq_u64(ytmp128, olq),
1747 /* Set vlan enable bits into cmd based on mask. */
1748 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1749 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1752 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1753 /* Tx ol_flag for timestam. */
1754 const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
1755 PKT_TX_IEEE1588_TMST};
1756 /* Set send mem alg to SUB. */
1757 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1758 /* Increment send mem address by 8. */
1759 const uint64x2_t addr = {0x8, 0x8};
1761 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1762 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1764 /* Check if timestamp is requested and generate inverted
1765 * mask as we need not make any changes to default cmd
1768 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1769 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1771 /* Change send mem address to an 8 byte offset when
1772 * TSTMP is disabled.
1774 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1775 vandq_u64(xtmp128, addr));
1776 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1777 vandq_u64(ytmp128, addr));
1778 /* Change send mem alg to SUB when TSTMP is disabled. */
1779 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1780 vandq_u64(xtmp128, alg));
1781 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1782 vandq_u64(ytmp128, alg));
1784 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1785 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1786 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1787 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1790 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1791 const uint64_t lso_fmt = txq->lso_tun_fmt;
1792 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1793 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1795 /* Extract SD W1 as we need to set L4 types. */
1796 vst1q_u64(sd_w1, senddesc01_w1);
1797 vst1q_u64(sd_w1 + 2, senddesc23_w1);
1799 /* Extract SX W0 as we need to set LSO fields. */
1800 vst1q_u64(sx_w0, sendext01_w0);
1801 vst1q_u64(sx_w0 + 2, sendext23_w0);
1803 /* Extract ol_flags. */
1804 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1805 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1807 /* Prepare individual mbufs. */
1808 cn10k_nix_prepare_tso(tx_pkts[0],
1809 (union nix_send_hdr_w1_u *)&sd_w1[0],
1810 (union nix_send_ext_w0_u *)&sx_w0[0],
1811 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
1813 cn10k_nix_prepare_tso(tx_pkts[1],
1814 (union nix_send_hdr_w1_u *)&sd_w1[1],
1815 (union nix_send_ext_w0_u *)&sx_w0[1],
1816 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
1818 cn10k_nix_prepare_tso(tx_pkts[2],
1819 (union nix_send_hdr_w1_u *)&sd_w1[2],
1820 (union nix_send_ext_w0_u *)&sx_w0[2],
1821 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
1823 cn10k_nix_prepare_tso(tx_pkts[3],
1824 (union nix_send_hdr_w1_u *)&sd_w1[3],
1825 (union nix_send_ext_w0_u *)&sx_w0[3],
1826 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
1828 senddesc01_w1 = vld1q_u64(sd_w1);
1829 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1831 sendext01_w0 = vld1q_u64(sx_w0);
1832 sendext23_w0 = vld1q_u64(sx_w0 + 2);
1835 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1836 !(flags & NIX_TX_MULTI_SEG_F)) {
1837 /* Set don't free bit if reference count > 1 */
1838 xmask01 = vdupq_n_u64(0);
1841 /* Move mbufs to iova */
1842 mbuf0 = (uint64_t *)tx_pkts[0];
1843 mbuf1 = (uint64_t *)tx_pkts[1];
1844 mbuf2 = (uint64_t *)tx_pkts[2];
1845 mbuf3 = (uint64_t *)tx_pkts[3];
1847 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1848 vsetq_lane_u64(0x80000, xmask01, 0);
1850 __mempool_check_cookies(
1851 ((struct rte_mbuf *)mbuf0)->pool,
1852 (void **)&mbuf0, 1, 0);
1854 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1855 vsetq_lane_u64(0x80000, xmask01, 1);
1857 __mempool_check_cookies(
1858 ((struct rte_mbuf *)mbuf1)->pool,
1859 (void **)&mbuf1, 1, 0);
1861 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1862 vsetq_lane_u64(0x80000, xmask23, 0);
1864 __mempool_check_cookies(
1865 ((struct rte_mbuf *)mbuf2)->pool,
1866 (void **)&mbuf2, 1, 0);
1868 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1869 vsetq_lane_u64(0x80000, xmask23, 1);
1871 __mempool_check_cookies(
1872 ((struct rte_mbuf *)mbuf3)->pool,
1873 (void **)&mbuf3, 1, 0);
1874 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1875 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1876 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1877 /* Move mbufs to iova */
1878 mbuf0 = (uint64_t *)tx_pkts[0];
1879 mbuf1 = (uint64_t *)tx_pkts[1];
1880 mbuf2 = (uint64_t *)tx_pkts[2];
1881 mbuf3 = (uint64_t *)tx_pkts[3];
1883 /* Mark mempool object as "put" since
1884 * it is freed by NIX
1886 __mempool_check_cookies(
1887 ((struct rte_mbuf *)mbuf0)->pool,
1888 (void **)&mbuf0, 1, 0);
1890 __mempool_check_cookies(
1891 ((struct rte_mbuf *)mbuf1)->pool,
1892 (void **)&mbuf1, 1, 0);
1894 __mempool_check_cookies(
1895 ((struct rte_mbuf *)mbuf2)->pool,
1896 (void **)&mbuf2, 1, 0);
1898 __mempool_check_cookies(
1899 ((struct rte_mbuf *)mbuf3)->pool,
1900 (void **)&mbuf3, 1, 0);
1903 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1904 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1905 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1906 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1907 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1909 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1910 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1911 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1912 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1914 if (flags & NIX_TX_NEED_EXT_HDR) {
1915 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1916 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1917 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1918 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1921 if (flags & NIX_TX_MULTI_SEG_F) {
1925 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
1928 LMT_OFF(laddr, lnum,
1930 &wd.data128, &shift,
1933 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1934 /* Store the prepared send desc to LMT lines */
1935 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1936 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1937 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1938 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1939 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
1940 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
1941 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
1942 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
1943 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
1945 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1946 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1947 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1948 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
1949 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
1950 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
1951 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
1952 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
1954 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1955 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1956 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1957 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
1958 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
1959 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
1961 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1962 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1963 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1964 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
1965 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
1966 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
1970 /* Store the prepared send desc to LMT lines */
1971 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1972 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
1973 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
1974 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
1975 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
1976 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
1977 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
1978 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
1982 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1985 if (flags & NIX_TX_MULTI_SEG_F)
1988 if (flags & NIX_TX_VWQE_F)
1989 roc_sso_hws_head_wait(base);
1993 if (!(flags & NIX_TX_MULTI_SEG_F))
1994 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
1996 pa = io_addr | (wd.data[0] & 0x7) << 4;
1997 wd.data[0] &= ~0x7ULL;
1999 if (flags & NIX_TX_MULTI_SEG_F)
2002 wd.data[0] |= (15ULL << 12);
2003 wd.data[0] |= (uint64_t)lmt_id;
2006 roc_lmt_submit_steorl(wd.data[0], pa);
2008 if (!(flags & NIX_TX_MULTI_SEG_F))
2009 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2011 pa = io_addr | (wd.data[1] & 0x7) << 4;
2012 wd.data[1] &= ~0x7ULL;
2014 if (flags & NIX_TX_MULTI_SEG_F)
2017 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2018 wd.data[1] |= (uint64_t)(lmt_id + 16);
2021 roc_lmt_submit_steorl(wd.data[1], pa);
2023 if (!(flags & NIX_TX_MULTI_SEG_F))
2024 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2026 pa = io_addr | (wd.data[0] & 0x7) << 4;
2027 wd.data[0] &= ~0x7ULL;
2029 if (flags & NIX_TX_MULTI_SEG_F)
2032 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2033 wd.data[0] |= lmt_id;
2036 roc_lmt_submit_steorl(wd.data[0], pa);
2044 if (unlikely(scalar)) {
2045 if (flags & NIX_TX_MULTI_SEG_F)
2046 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2050 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2058 static __rte_always_inline uint16_t
2059 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2060 uint16_t pkts, uint64_t *cmd, uintptr_t base,
2061 const uint16_t flags)
2063 RTE_SET_USED(tx_queue);
2064 RTE_SET_USED(tx_pkts);
2067 RTE_SET_USED(flags);
2073 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2074 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2075 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2076 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2077 #define TSO_F NIX_TX_OFFLOAD_TSO_F
2078 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2080 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2081 #define NIX_TX_FASTPATH_MODES \
2082 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
2083 NIX_TX_OFFLOAD_NONE) \
2084 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
2086 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
2088 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
2089 OL3OL4CSUM_F | L3L4CSUM_F) \
2090 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
2092 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
2093 VLAN_F | L3L4CSUM_F) \
2094 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
2095 VLAN_F | OL3OL4CSUM_F) \
2096 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
2097 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2098 T(noff, 0, 0, 1, 0, 0, 0, 4, \
2100 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
2101 NOFF_F | L3L4CSUM_F) \
2102 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
2103 NOFF_F | OL3OL4CSUM_F) \
2104 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
2105 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2106 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
2108 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
2109 NOFF_F | VLAN_F | L3L4CSUM_F) \
2110 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
2111 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2112 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
2113 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2114 T(tso, 0, 1, 0, 0, 0, 0, 6, \
2116 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
2117 TSO_F | L3L4CSUM_F) \
2118 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
2119 TSO_F | OL3OL4CSUM_F) \
2120 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
2121 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2122 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
2124 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
2125 TSO_F | VLAN_F | L3L4CSUM_F) \
2126 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
2127 TSO_F | VLAN_F | OL3OL4CSUM_F) \
2128 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
2129 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2130 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
2132 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
2133 TSO_F | NOFF_F | L3L4CSUM_F) \
2134 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
2135 TSO_F | NOFF_F | OL3OL4CSUM_F) \
2136 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
2137 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2138 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
2139 TSO_F | NOFF_F | VLAN_F) \
2140 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
2141 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2142 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
2143 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2144 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
2145 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2146 T(ts, 1, 0, 0, 0, 0, 0, 8, \
2148 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
2149 TSP_F | L3L4CSUM_F) \
2150 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
2151 TSP_F | OL3OL4CSUM_F) \
2152 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
2153 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2154 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
2156 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
2157 TSP_F | VLAN_F | L3L4CSUM_F) \
2158 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
2159 TSP_F | VLAN_F | OL3OL4CSUM_F) \
2160 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
2161 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2162 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
2164 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
2165 TSP_F | NOFF_F | L3L4CSUM_F) \
2166 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
2167 TSP_F | NOFF_F | OL3OL4CSUM_F) \
2168 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
2169 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2170 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
2171 TSP_F | NOFF_F | VLAN_F) \
2172 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
2173 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2174 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
2175 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2176 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
2177 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2178 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
2180 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
2181 TSP_F | TSO_F | L3L4CSUM_F) \
2182 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
2183 TSP_F | TSO_F | OL3OL4CSUM_F) \
2184 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
2185 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2186 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
2187 TSP_F | TSO_F | VLAN_F) \
2188 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
2189 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2190 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
2191 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2192 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
2193 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2194 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
2195 TSP_F | TSO_F | NOFF_F) \
2196 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
2197 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2198 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
2199 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2200 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
2201 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2202 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
2203 TSP_F | TSO_F | NOFF_F | VLAN_F) \
2204 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
2205 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2206 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
2207 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2208 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
2209 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2211 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
2212 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
2213 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2215 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
2216 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2218 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
2219 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2221 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2222 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2224 NIX_TX_FASTPATH_MODES
2227 #endif /* __CN10K_TX_H__ */