1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
17 /* Flags to control xmit_prepare function.
18 * Defining it from backwards to denote its been
19 * not used as offload flags to pick function
21 #define NIX_TX_VWQE_F BIT(14)
22 #define NIX_TX_MULTI_SEG_F BIT(15)
24 #define NIX_TX_NEED_SEND_HDR_W1 \
25 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
26 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
28 #define NIX_TX_NEED_EXT_HDR \
29 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
32 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
34 /* Cached value is low, Update the fc_cache_pkts */ \
35 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
36 /* Multiply with sqe_per_sqb to express in pkts */ \
37 (txq)->fc_cache_pkts = \
38 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
39 << (txq)->sqes_per_sqb_log2; \
40 /* Check it again for the room */ \
41 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
46 /* Encoded number of segments to number of dwords macro, each value of nb_segs
47 * is encoded as 4bits.
49 #define NIX_SEGDW_MAGIC 0x76654432210ULL
51 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
53 #define LMT_OFF(lmt_addr, lmt_num, offset) \
54 (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
56 /* Function to determine no of tx subdesc required in case ext
57 * sub desc is enabled.
59 static __rte_always_inline int
60 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
65 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
70 static __rte_always_inline uint8_t
71 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
73 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
74 << ROC_LMT_LINES_PER_CORE_LOG2;
77 static __rte_always_inline uint8_t
78 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
80 return (flags & NIX_TX_NEED_EXT_HDR) ?
81 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
85 static __rte_always_inline uint64_t
86 cn10k_nix_tx_steor_data(const uint16_t flags)
88 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
91 /* This will be moved to addr area */
93 /* 15 vector sizes for single seg */
113 static __rte_always_inline uint8_t
114 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
116 return ((flags & NIX_TX_NEED_EXT_HDR) ?
117 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
121 static __rte_always_inline uint64_t
122 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
124 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
127 /* This will be moved to addr area */
129 /* 15 vector sizes for single seg */
149 static __rte_always_inline void
150 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
151 const uint16_t flags)
154 cmd[0] = txq->send_hdr_w0;
158 /* Send ext if present */
159 if (flags & NIX_TX_NEED_EXT_HDR) {
160 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
169 static __rte_always_inline void
170 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
172 uint64_t mask, ol_flags = m->ol_flags;
174 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
175 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
176 uint16_t *iplen, *oiplen, *oudplen;
177 uint16_t lso_sb, paylen;
179 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
180 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
181 m->l2_len + m->l3_len + m->l4_len;
183 /* Reduce payload len from base headers */
184 paylen = m->pkt_len - lso_sb;
186 /* Get iplen position assuming no tunnel hdr */
187 iplen = (uint16_t *)(mdata + m->l2_len +
188 (2 << !!(ol_flags & PKT_TX_IPV6)));
189 /* Handle tunnel tso */
190 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
191 (ol_flags & PKT_TX_TUNNEL_MASK)) {
192 const uint8_t is_udp_tun =
193 (CNXK_NIX_UDP_TUN_BITMASK >>
194 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
197 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
199 PKT_TX_OUTER_IPV6)));
200 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
203 /* Update format for UDP tunneled packet */
205 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
206 m->outer_l3_len + 4);
207 *oudplen = rte_cpu_to_be_16(
208 rte_be_to_cpu_16(*oudplen) - paylen);
211 /* Update iplen position to inner ip hdr */
212 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
214 (2 << !!(ol_flags & PKT_TX_IPV6)));
217 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
221 static __rte_always_inline void
222 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
223 const uint16_t flags, const uint64_t lso_tun_fmt)
225 struct nix_send_ext_s *send_hdr_ext;
226 struct nix_send_hdr_s *send_hdr;
227 uint64_t ol_flags = 0, mask;
228 union nix_send_hdr_w1_u w1;
229 union nix_send_sg_s *sg;
231 send_hdr = (struct nix_send_hdr_s *)cmd;
232 if (flags & NIX_TX_NEED_EXT_HDR) {
233 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
234 sg = (union nix_send_sg_s *)(cmd + 4);
235 /* Clear previous markings */
236 send_hdr_ext->w0.lso = 0;
237 send_hdr_ext->w1.u = 0;
239 sg = (union nix_send_sg_s *)(cmd + 2);
242 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
243 ol_flags = m->ol_flags;
247 if (!(flags & NIX_TX_MULTI_SEG_F)) {
248 send_hdr->w0.total = m->data_len;
250 roc_npa_aura_handle_to_aura(m->pool->pool_id);
255 * 3 => IPV4 with csum
257 * L3type and L3ptr needs to be set for either
258 * L3 csum or L4 csum or LSO
262 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
263 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
264 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
265 const uint8_t ol3type =
266 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
267 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
268 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
271 w1.ol3type = ol3type;
272 mask = 0xffffull << ((!!ol3type) << 4);
273 w1.ol3ptr = ~mask & m->outer_l2_len;
274 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
277 w1.ol4type = csum + (csum << 1);
280 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
281 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
282 w1.il3ptr = w1.ol4ptr + m->l2_len;
283 w1.il4ptr = w1.il3ptr + m->l3_len;
284 /* Increment it by 1 if it is IPV4 as 3 is with csum */
285 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
288 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
290 /* In case of no tunnel header use only
291 * shift IL3/IL4 fields a bit to use
292 * OL3/OL4 for header checksum
295 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
296 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
298 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
299 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
300 const uint8_t outer_l2_len = m->outer_l2_len;
303 w1.ol3ptr = outer_l2_len;
304 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
305 /* Increment it by 1 if it is IPV4 as 3 is with csum */
306 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
307 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
308 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
311 w1.ol4type = csum + (csum << 1);
313 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
314 const uint8_t l2_len = m->l2_len;
316 /* Always use OLXPTR and OLXTYPE when only
317 * when one header is present
322 w1.ol4ptr = l2_len + m->l3_len;
323 /* Increment it by 1 if it is IPV4 as 3 is with csum */
324 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
325 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
326 !!(ol_flags & PKT_TX_IP_CKSUM);
329 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
332 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
333 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
334 /* HW will update ptr after vlan0 update */
335 send_hdr_ext->w1.vlan1_ins_ptr = 12;
336 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
338 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
339 /* 2B before end of l2 header */
340 send_hdr_ext->w1.vlan0_ins_ptr = 12;
341 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
344 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
348 mask = -(!w1.il3type);
349 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
351 send_hdr_ext->w0.lso_sb = lso_sb;
352 send_hdr_ext->w0.lso = 1;
353 send_hdr_ext->w0.lso_mps = m->tso_segsz;
354 send_hdr_ext->w0.lso_format =
355 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
356 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
358 /* Handle tunnel tso */
359 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
360 (ol_flags & PKT_TX_TUNNEL_MASK)) {
361 const uint8_t is_udp_tun =
362 (CNXK_NIX_UDP_TUN_BITMASK >>
363 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
365 uint8_t shift = is_udp_tun ? 32 : 0;
367 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
368 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
370 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
371 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
372 /* Update format for UDP tunneled packet */
373 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
377 if (flags & NIX_TX_NEED_SEND_HDR_W1)
378 send_hdr->w1.u = w1.u;
380 if (!(flags & NIX_TX_MULTI_SEG_F)) {
381 sg->seg1_size = m->data_len;
382 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
384 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
385 /* DF bit = 1 if refcount of current mbuf or parent mbuf
387 * DF bit = 0 otherwise
389 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
391 /* Mark mempool object as "put" since it is freed by NIX */
392 if (!send_hdr->w0.df)
393 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
396 /* With minimal offloads, 'cmd' being local could be optimized out to
397 * registers. In other cases, 'cmd' will be in stack. Intent is
398 * 'cmd' stores content from txq->cmd which is copied only once.
400 *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
402 if (flags & NIX_TX_NEED_EXT_HDR) {
403 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
406 /* In case of multi-seg, sg template is stored here */
407 *((union nix_send_sg_s *)lmt_addr) = *sg;
408 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
411 static __rte_always_inline void
412 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
413 const uint64_t ol_flags, const uint16_t no_segdw,
414 const uint16_t flags)
416 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
417 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
418 struct nix_send_ext_s *send_hdr_ext =
419 (struct nix_send_ext_s *)lmt_addr + 16;
420 uint64_t *lmt = (uint64_t *)lmt_addr;
421 uint16_t off = (no_segdw - 1) << 1;
422 struct nix_send_mem_s *send_mem;
424 send_mem = (struct nix_send_mem_s *)(lmt + off);
425 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
426 send_hdr_ext->w0.tstmp = 1;
427 if (flags & NIX_TX_MULTI_SEG_F) {
428 /* Retrieving the default desc values */
431 /* Using compiler barier to avoid voilation of C
434 rte_compiler_barrier();
437 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
438 * should not be recorded, hence changing the alg type to
439 * NIX_SENDMEMALG_SET and also changing send mem addr field to
440 * next 8 bytes as it corrpt the actual tx tstamp registered
443 send_mem->w0.subdc = NIX_SUBDC_MEM;
444 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
446 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
450 static __rte_always_inline uint16_t
451 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
453 struct nix_send_hdr_s *send_hdr;
454 union nix_send_sg_s *sg;
455 struct rte_mbuf *m_next;
456 uint64_t *slist, sg_u;
461 send_hdr = (struct nix_send_hdr_s *)cmd;
462 send_hdr->w0.total = m->pkt_len;
463 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
465 if (flags & NIX_TX_NEED_EXT_HDR)
470 sg = (union nix_send_sg_s *)&cmd[2 + off];
471 /* Clear sg->u header before use */
472 sg->u &= 0xFC00000000000000;
474 slist = &cmd[3 + off];
477 nb_segs = m->nb_segs;
479 /* Fill mbuf segments */
482 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
483 *slist = rte_mbuf_data_iova(m);
484 /* Set invert df if buffer is not to be freed by H/W */
485 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
486 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
487 /* Mark mempool object as "put" since it is freed by NIX
489 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
490 if (!(sg_u & (1ULL << (i + 55))))
491 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
496 if (i > 2 && nb_segs) {
498 /* Next SG subdesc */
499 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
502 sg = (union nix_send_sg_s *)slist;
511 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
512 /* Roundup extra dwords to multiple of 2 */
513 segdw = (segdw >> 1) + (segdw & 0x1);
515 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
516 send_hdr->w0.sizem1 = segdw - 1;
521 static __rte_always_inline uint16_t
522 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
523 uint64_t *cmd, uintptr_t base, const uint16_t flags)
525 struct cn10k_eth_txq *txq = tx_queue;
526 const rte_iova_t io_addr = txq->io_addr;
527 uintptr_t pa, lmt_addr = txq->lmt_base;
528 uint16_t lmt_id, burst, left, i;
529 uint64_t lso_tun_fmt;
532 if (!(flags & NIX_TX_VWQE_F)) {
533 NIX_XMIT_FC_OR_RETURN(txq, pkts);
534 /* Reduce the cached count */
535 txq->fc_cache_pkts -= pkts;
538 /* Get cmd skeleton */
539 cn10k_nix_tx_skeleton(txq, cmd, flags);
541 if (flags & NIX_TX_OFFLOAD_TSO_F)
542 lso_tun_fmt = txq->lso_tun_fmt;
544 /* Get LMT base address and LMT ID as lcore id */
545 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
548 burst = left > 32 ? 32 : left;
549 for (i = 0; i < burst; i++) {
550 /* Perform header writes for TSO, barrier at
551 * lmt steorl will suffice.
553 if (flags & NIX_TX_OFFLOAD_TSO_F)
554 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
556 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
558 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
559 tx_pkts[i]->ol_flags, 4, flags);
560 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
563 if (flags & NIX_TX_VWQE_F)
564 roc_sso_hws_head_wait(base);
568 data = cn10k_nix_tx_steor_data(flags);
569 pa = io_addr | (data & 0x7) << 4;
571 data |= (15ULL << 12);
572 data |= (uint64_t)lmt_id;
575 roc_lmt_submit_steorl(data, pa);
577 data = cn10k_nix_tx_steor_data(flags);
578 pa = io_addr | (data & 0x7) << 4;
580 data |= ((uint64_t)(burst - 17)) << 12;
581 data |= (uint64_t)(lmt_id + 16);
584 roc_lmt_submit_steorl(data, pa);
586 data = cn10k_nix_tx_steor_data(flags);
587 pa = io_addr | (data & 0x7) << 4;
589 data |= ((uint64_t)(burst - 1)) << 12;
593 roc_lmt_submit_steorl(data, pa);
599 /* Start processing another burst */
601 /* Reset lmt base addr */
602 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
603 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
610 static __rte_always_inline uint16_t
611 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
612 uint16_t pkts, uint64_t *cmd, uintptr_t base,
613 const uint16_t flags)
615 struct cn10k_eth_txq *txq = tx_queue;
616 uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
617 const rte_iova_t io_addr = txq->io_addr;
618 uint16_t segdw, lmt_id, burst, left, i;
619 uint64_t data0, data1;
620 uint64_t lso_tun_fmt;
624 NIX_XMIT_FC_OR_RETURN(txq, pkts);
626 cn10k_nix_tx_skeleton(txq, cmd, flags);
628 /* Reduce the cached count */
629 txq->fc_cache_pkts -= pkts;
631 if (flags & NIX_TX_OFFLOAD_TSO_F)
632 lso_tun_fmt = txq->lso_tun_fmt;
634 /* Get LMT base address and LMT ID as lcore id */
635 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
638 burst = left > 32 ? 32 : left;
641 for (i = 0; i < burst; i++) {
642 /* Perform header writes for TSO, barrier at
643 * lmt steorl will suffice.
645 if (flags & NIX_TX_OFFLOAD_TSO_F)
646 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
648 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
650 /* Store sg list directly on lmt line */
651 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
653 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
654 tx_pkts[i]->ol_flags, segdw,
656 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
657 data128 |= (((__uint128_t)(segdw - 1)) << shft);
661 if (flags & NIX_TX_VWQE_F)
662 roc_sso_hws_head_wait(base);
664 data0 = (uint64_t)data128;
665 data1 = (uint64_t)(data128 >> 64);
666 /* Make data0 similar to data1 */
670 pa0 = io_addr | (data0 & 0x7) << 4;
672 /* Move lmtst1..15 sz to bits 63:19 */
674 data0 |= (15ULL << 12);
675 data0 |= (uint64_t)lmt_id;
678 roc_lmt_submit_steorl(data0, pa0);
680 pa1 = io_addr | (data1 & 0x7) << 4;
683 data1 |= ((uint64_t)(burst - 17)) << 12;
684 data1 |= (uint64_t)(lmt_id + 16);
687 roc_lmt_submit_steorl(data1, pa1);
689 pa0 = io_addr | (data0 & 0x7) << 4;
691 /* Move lmtst1..15 sz to bits 63:19 */
693 data0 |= ((burst - 1) << 12);
694 data0 |= (uint64_t)lmt_id;
697 roc_lmt_submit_steorl(data0, pa0);
703 /* Start processing another burst */
705 /* Reset lmt base addr */
706 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
707 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
714 #if defined(RTE_ARCH_ARM64)
716 static __rte_always_inline void
717 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
718 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
719 const uint64_t flags, const uint64_t lso_tun_fmt)
724 if (!(ol_flags & PKT_TX_TCP_SEG))
727 mask = -(!w1->il3type);
728 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
732 w0->lso_mps = m->tso_segsz;
733 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
734 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
736 /* Handle tunnel tso */
737 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
738 (ol_flags & PKT_TX_TUNNEL_MASK)) {
739 const uint8_t is_udp_tun =
740 (CNXK_NIX_UDP_TUN_BITMASK >>
741 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
743 uint8_t shift = is_udp_tun ? 32 : 0;
745 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
746 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
748 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
749 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
750 /* Update format for UDP tunneled packet */
752 w0->lso_format = (lso_tun_fmt >> shift);
756 static __rte_always_inline void
757 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
758 union nix_send_hdr_w0_u *sh,
759 union nix_send_sg_s *sg, const uint32_t flags)
761 struct rte_mbuf *m_next;
762 uint64_t *slist, sg_u;
766 sh->total = m->pkt_len;
767 /* Clear sg->u header before use */
768 sg->u &= 0xFC00000000000000;
772 sg_u = sg_u | ((uint64_t)m->data_len);
774 nb_segs = m->nb_segs - 1;
777 /* Set invert df if buffer is not to be freed by H/W */
778 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
779 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
780 /* Mark mempool object as "put" since it is freed by NIX */
781 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
782 if (!(sg_u & (1ULL << 55)))
783 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
788 /* Fill mbuf segments */
791 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
792 *slist = rte_mbuf_data_iova(m);
793 /* Set invert df if buffer is not to be freed by H/W */
794 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
795 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
796 /* Mark mempool object as "put" since it is freed by NIX
798 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
799 if (!(sg_u & (1ULL << (i + 55))))
800 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
806 if (i > 2 && nb_segs) {
808 /* Next SG subdesc */
809 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
812 sg = (union nix_send_sg_s *)slist;
823 static __rte_always_inline void
824 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
825 uint64x2_t *cmd1, const uint8_t segdw,
826 const uint32_t flags)
828 union nix_send_hdr_w0_u sh;
829 union nix_send_sg_s sg;
831 if (m->nb_segs == 1) {
832 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
833 sg.u = vgetq_lane_u64(cmd1[0], 0);
834 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
835 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
838 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
839 sg.u = vgetq_lane_u64(cmd1[0], 0);
840 if (!(sg.u & (1ULL << 55)))
841 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
847 sh.u = vgetq_lane_u64(cmd0[0], 0);
848 sg.u = vgetq_lane_u64(cmd1[0], 0);
850 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
852 sh.sizem1 = segdw - 1;
853 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
854 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
857 #define NIX_DESCS_PER_LOOP 4
859 static __rte_always_inline uint8_t
860 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
861 uint64x2_t *cmd1, uint64x2_t *cmd2,
862 uint64x2_t *cmd3, uint8_t *segdw,
863 uint64_t *lmt_addr, __uint128_t *data128,
864 uint8_t *shift, const uint16_t flags)
866 uint8_t j, off, lmt_used;
868 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
869 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
870 /* No segments in 4 consecutive packets. */
871 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
872 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
873 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
876 vst1q_u64(lmt_addr, cmd0[0]);
877 vst1q_u64(lmt_addr + 2, cmd1[0]);
878 vst1q_u64(lmt_addr + 4, cmd0[1]);
879 vst1q_u64(lmt_addr + 6, cmd1[1]);
880 vst1q_u64(lmt_addr + 8, cmd0[2]);
881 vst1q_u64(lmt_addr + 10, cmd1[2]);
882 vst1q_u64(lmt_addr + 12, cmd0[3]);
883 vst1q_u64(lmt_addr + 14, cmd1[3]);
885 *data128 |= ((__uint128_t)7) << *shift;
893 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
894 /* Fit consecutive packets in same LMTLINE. */
895 if ((segdw[j] + segdw[j + 1]) <= 8) {
896 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
897 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
900 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
903 segdw[j + 1], flags);
904 /* TSTAMP takes 4 each, no segs. */
905 vst1q_u64(lmt_addr, cmd0[j]);
906 vst1q_u64(lmt_addr + 2, cmd2[j]);
907 vst1q_u64(lmt_addr + 4, cmd1[j]);
908 vst1q_u64(lmt_addr + 6, cmd3[j]);
910 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
911 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
912 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
913 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
914 } else if (flags & NIX_TX_NEED_EXT_HDR) {
915 /* EXT header take 3 each, space for 2 segs.*/
916 cn10k_nix_prepare_mseg_vec(mbufs[j],
920 vst1q_u64(lmt_addr, cmd0[j]);
921 vst1q_u64(lmt_addr + 2, cmd2[j]);
922 vst1q_u64(lmt_addr + 4, cmd1[j]);
925 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
929 segdw[j + 1], flags);
930 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
931 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
932 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
934 cn10k_nix_prepare_mseg_vec(mbufs[j],
938 vst1q_u64(lmt_addr, cmd0[j]);
939 vst1q_u64(lmt_addr + 2, cmd1[j]);
942 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
946 segdw[j + 1], flags);
947 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
948 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
950 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
955 if ((flags & NIX_TX_NEED_EXT_HDR) &&
956 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
957 cn10k_nix_prepare_mseg_vec(mbufs[j],
961 vst1q_u64(lmt_addr, cmd0[j]);
962 vst1q_u64(lmt_addr + 2, cmd2[j]);
963 vst1q_u64(lmt_addr + 4, cmd1[j]);
966 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
967 } else if (flags & NIX_TX_NEED_EXT_HDR) {
968 cn10k_nix_prepare_mseg_vec(mbufs[j],
972 vst1q_u64(lmt_addr, cmd0[j]);
973 vst1q_u64(lmt_addr + 2, cmd2[j]);
974 vst1q_u64(lmt_addr + 4, cmd1[j]);
976 cn10k_nix_prepare_mseg_vec(mbufs[j],
980 vst1q_u64(lmt_addr, cmd0[j]);
981 vst1q_u64(lmt_addr + 2, cmd1[j]);
983 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
994 static __rte_always_inline uint16_t
995 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
996 uint16_t pkts, uint64_t *cmd, uintptr_t base,
997 const uint16_t flags)
999 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1000 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1001 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1002 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1003 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1004 uint64x2_t senddesc01_w0, senddesc23_w0;
1005 uint64x2_t senddesc01_w1, senddesc23_w1;
1006 uint16_t left, scalar, burst, i, lmt_id;
1007 uint64x2_t sendext01_w0, sendext23_w0;
1008 uint64x2_t sendext01_w1, sendext23_w1;
1009 uint64x2_t sendmem01_w0, sendmem23_w0;
1010 uint64x2_t sendmem01_w1, sendmem23_w1;
1011 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1012 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1013 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1014 struct cn10k_eth_txq *txq = tx_queue;
1015 uintptr_t laddr = txq->lmt_base;
1016 rte_iova_t io_addr = txq->io_addr;
1017 uint64x2_t ltypes01, ltypes23;
1018 uint64x2_t xtmp128, ytmp128;
1019 uint64x2_t xmask01, xmask23;
1020 uint8_t lnum, shift;
1022 __uint128_t data128;
1026 if (!(flags & NIX_TX_VWQE_F)) {
1027 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1028 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1029 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1030 /* Reduce the cached count */
1031 txq->fc_cache_pkts -= pkts;
1033 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1034 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1037 /* Perform header writes before barrier for TSO */
1038 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1039 for (i = 0; i < pkts; i++)
1040 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1043 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1044 senddesc23_w0 = senddesc01_w0;
1045 senddesc01_w1 = vdupq_n_u64(0);
1046 senddesc23_w1 = senddesc01_w1;
1047 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1048 sgdesc23_w0 = sgdesc01_w0;
1050 /* Load command defaults into vector variables. */
1051 if (flags & NIX_TX_NEED_EXT_HDR) {
1052 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1053 sendext23_w0 = sendext01_w0;
1054 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1055 sendext23_w1 = sendext01_w1;
1056 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1057 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1058 sendmem23_w0 = sendmem01_w0;
1059 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1060 sendmem23_w1 = sendmem01_w1;
1064 /* Get LMT base address and LMT ID as lcore id */
1065 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1068 /* Number of packets to prepare depends on offloads enabled. */
1069 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1070 cn10k_nix_pkts_per_vec_brst(flags) :
1072 if (flags & NIX_TX_MULTI_SEG_F) {
1078 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1079 if (flags & NIX_TX_MULTI_SEG_F) {
1082 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1083 struct rte_mbuf *m = tx_pkts[j];
1085 /* Get dwords based on nb_segs. */
1086 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1087 /* Add dwords based on offloads. */
1088 segdw[j] += 1 + /* SEND HDR */
1089 !!(flags & NIX_TX_NEED_EXT_HDR) +
1090 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1093 /* Check if there are enough LMTLINES for this loop */
1094 if (lnum + 4 > 32) {
1095 uint8_t ldwords_con = 0, lneeded = 0;
1096 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1097 ldwords_con += segdw[j];
1098 if (ldwords_con > 8) {
1100 ldwords_con = segdw[j];
1104 if (lnum + lneeded > 32) {
1110 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1112 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1113 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1115 senddesc23_w0 = senddesc01_w0;
1116 sgdesc23_w0 = sgdesc01_w0;
1118 /* Clear vlan enables. */
1119 if (flags & NIX_TX_NEED_EXT_HDR) {
1120 sendext01_w1 = vbicq_u64(sendext01_w1,
1121 vdupq_n_u64(0x3FFFF00FFFF00));
1122 sendext23_w1 = sendext01_w1;
1125 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1126 /* Reset send mem alg to SETTSTMP from SUB*/
1127 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1128 vdupq_n_u64(BIT_ULL(59)));
1129 /* Reset send mem address to default. */
1131 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1132 sendmem23_w0 = sendmem01_w0;
1133 sendmem23_w1 = sendmem01_w1;
1136 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1137 /* Clear the LSO enable bit. */
1138 sendext01_w0 = vbicq_u64(sendext01_w0,
1139 vdupq_n_u64(BIT_ULL(14)));
1140 sendext23_w0 = sendext01_w0;
1143 /* Move mbufs to iova */
1144 mbuf0 = (uint64_t *)tx_pkts[0];
1145 mbuf1 = (uint64_t *)tx_pkts[1];
1146 mbuf2 = (uint64_t *)tx_pkts[2];
1147 mbuf3 = (uint64_t *)tx_pkts[3];
1149 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1150 offsetof(struct rte_mbuf, buf_iova));
1151 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1152 offsetof(struct rte_mbuf, buf_iova));
1153 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1154 offsetof(struct rte_mbuf, buf_iova));
1155 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1156 offsetof(struct rte_mbuf, buf_iova));
1158 * Get mbuf's, olflags, iova, pktlen, dataoff
1159 * dataoff_iovaX.D[0] = iova,
1160 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1161 * len_olflagsX.D[0] = ol_flags,
1162 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1164 dataoff_iova0 = vld1q_u64(mbuf0);
1165 len_olflags0 = vld1q_u64(mbuf0 + 2);
1166 dataoff_iova1 = vld1q_u64(mbuf1);
1167 len_olflags1 = vld1q_u64(mbuf1 + 2);
1168 dataoff_iova2 = vld1q_u64(mbuf2);
1169 len_olflags2 = vld1q_u64(mbuf2 + 2);
1170 dataoff_iova3 = vld1q_u64(mbuf3);
1171 len_olflags3 = vld1q_u64(mbuf3 + 2);
1173 /* Move mbufs to point pool */
1174 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1175 offsetof(struct rte_mbuf, pool) -
1176 offsetof(struct rte_mbuf, buf_iova));
1177 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1178 offsetof(struct rte_mbuf, pool) -
1179 offsetof(struct rte_mbuf, buf_iova));
1180 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1181 offsetof(struct rte_mbuf, pool) -
1182 offsetof(struct rte_mbuf, buf_iova));
1183 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1184 offsetof(struct rte_mbuf, pool) -
1185 offsetof(struct rte_mbuf, buf_iova));
1187 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1188 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1189 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1191 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1192 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1195 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1196 : [a] "+w"(senddesc01_w1)
1197 : [in] "r"(mbuf0 + 2)
1200 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1201 : [a] "+w"(senddesc01_w1)
1202 : [in] "r"(mbuf1 + 2)
1205 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1206 : [b] "+w"(senddesc23_w1)
1207 : [in] "r"(mbuf2 + 2)
1210 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1211 : [b] "+w"(senddesc23_w1)
1212 : [in] "r"(mbuf3 + 2)
1215 /* Get pool pointer alone */
1216 mbuf0 = (uint64_t *)*mbuf0;
1217 mbuf1 = (uint64_t *)*mbuf1;
1218 mbuf2 = (uint64_t *)*mbuf2;
1219 mbuf3 = (uint64_t *)*mbuf3;
1221 /* Get pool pointer alone */
1222 mbuf0 = (uint64_t *)*mbuf0;
1223 mbuf1 = (uint64_t *)*mbuf1;
1224 mbuf2 = (uint64_t *)*mbuf2;
1225 mbuf3 = (uint64_t *)*mbuf3;
1228 const uint8x16_t shuf_mask2 = {
1229 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1230 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1232 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1233 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1235 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1236 const uint64x2_t and_mask0 = {
1241 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1242 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1243 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1244 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1247 * Pick only 16 bits of pktlen preset at bits 63:32
1248 * and place them at bits 15:0.
1250 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1251 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1253 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1254 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1255 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1257 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1258 * pktlen at 15:0 position.
1260 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1261 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1262 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1263 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1265 /* Move mbuf to point to pool_id. */
1266 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1267 offsetof(struct rte_mempool, pool_id));
1268 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1269 offsetof(struct rte_mempool, pool_id));
1270 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1271 offsetof(struct rte_mempool, pool_id));
1272 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1273 offsetof(struct rte_mempool, pool_id));
1275 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1276 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1278 * Lookup table to translate ol_flags to
1279 * il3/il4 types. But we still use ol3/ol4 types in
1280 * senddesc_w1 as only one header processing is enabled.
1282 const uint8x16_t tbl = {
1283 /* [0-15] = il4type:il3type */
1284 0x04, /* none (IPv6 assumed) */
1285 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
1286 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
1287 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
1288 0x03, /* PKT_TX_IP_CKSUM */
1289 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
1290 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
1291 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
1292 0x02, /* PKT_TX_IPV4 */
1293 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
1294 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
1295 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
1296 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
1297 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1300 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1303 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1308 /* Extract olflags to translate to iltypes */
1309 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1310 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1313 * E(47):L3_LEN(9):L2_LEN(7+z)
1314 * E(47):L3_LEN(9):L2_LEN(7+z)
1316 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1317 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1319 /* Move OLFLAGS bits 55:52 to 51:48
1320 * with zeros preprended on the byte and rest
1323 xtmp128 = vshrq_n_u8(xtmp128, 4);
1324 ytmp128 = vshrq_n_u8(ytmp128, 4);
1326 * E(48):L3_LEN(8):L2_LEN(z+7)
1327 * E(48):L3_LEN(8):L2_LEN(z+7)
1329 const int8x16_t tshft3 = {
1330 -1, 0, 8, 8, 8, 8, 8, 8,
1331 -1, 0, 8, 8, 8, 8, 8, 8,
1334 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1335 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1338 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1339 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1341 /* Pick only relevant fields i.e Bit 48:55 of iltype
1342 * and place it in ol3/ol4type of senddesc_w1
1344 const uint8x16_t shuf_mask0 = {
1345 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1346 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1349 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1350 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1352 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1353 * a [E(32):E(16):OL3(8):OL2(8)]
1355 * a [E(32):E(16):(OL3+OL2):OL2]
1356 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1358 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1359 vshlq_n_u16(senddesc01_w1, 8));
1360 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1361 vshlq_n_u16(senddesc23_w1, 8));
1363 /* Move ltypes to senddesc*_w1 */
1364 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1365 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1366 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1367 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1369 * Lookup table to translate ol_flags to
1373 const uint8x16_t tbl = {
1374 /* [0-15] = ol4type:ol3type */
1376 0x03, /* OUTER_IP_CKSUM */
1377 0x02, /* OUTER_IPV4 */
1378 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1379 0x04, /* OUTER_IPV6 */
1380 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1381 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1382 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1385 0x00, /* OUTER_UDP_CKSUM */
1386 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1387 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1388 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1391 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1392 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1395 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1398 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1399 * OUTER_IPV4 | OUTER_IP_CKSUM
1403 /* Extract olflags to translate to iltypes */
1404 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1405 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1408 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1409 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1411 const uint8x16_t shuf_mask5 = {
1412 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1413 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1415 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1416 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1418 /* Extract outer ol flags only */
1419 const uint64x2_t o_cksum_mask = {
1424 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1425 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1427 /* Extract OUTER_UDP_CKSUM bit 41 and
1431 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1432 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1434 /* Shift oltype by 2 to start nibble from BIT(56)
1435 * instead of BIT(58)
1437 xtmp128 = vshrq_n_u8(xtmp128, 2);
1438 ytmp128 = vshrq_n_u8(ytmp128, 2);
1440 * E(48):L3_LEN(8):L2_LEN(z+7)
1441 * E(48):L3_LEN(8):L2_LEN(z+7)
1443 const int8x16_t tshft3 = {
1444 -1, 0, 8, 8, 8, 8, 8, 8,
1445 -1, 0, 8, 8, 8, 8, 8, 8,
1448 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1449 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1452 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1453 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1455 /* Pick only relevant fields i.e Bit 56:63 of oltype
1456 * and place it in ol3/ol4type of senddesc_w1
1458 const uint8x16_t shuf_mask0 = {
1459 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1460 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1463 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1464 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1466 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1467 * a [E(32):E(16):OL3(8):OL2(8)]
1469 * a [E(32):E(16):(OL3+OL2):OL2]
1470 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1472 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1473 vshlq_n_u16(senddesc01_w1, 8));
1474 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1475 vshlq_n_u16(senddesc23_w1, 8));
1477 /* Move ltypes to senddesc*_w1 */
1478 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1479 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1480 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1481 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1482 /* Lookup table to translate ol_flags to
1483 * ol4type, ol3type, il4type, il3type of senddesc_w1
1485 const uint8x16x2_t tbl = {{
1487 /* [0-15] = il4type:il3type */
1488 0x04, /* none (IPv6) */
1489 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1490 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1491 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1492 0x03, /* PKT_TX_IP_CKSUM */
1493 0x13, /* PKT_TX_IP_CKSUM |
1496 0x23, /* PKT_TX_IP_CKSUM |
1499 0x33, /* PKT_TX_IP_CKSUM |
1502 0x02, /* PKT_TX_IPV4 */
1503 0x12, /* PKT_TX_IPV4 |
1506 0x22, /* PKT_TX_IPV4 |
1509 0x32, /* PKT_TX_IPV4 |
1512 0x03, /* PKT_TX_IPV4 |
1515 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1518 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1521 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1527 /* [16-31] = ol4type:ol3type */
1529 0x03, /* OUTER_IP_CKSUM */
1530 0x02, /* OUTER_IPV4 */
1531 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1532 0x04, /* OUTER_IPV6 */
1533 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1534 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1535 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1538 0x00, /* OUTER_UDP_CKSUM */
1539 0x33, /* OUTER_UDP_CKSUM |
1542 0x32, /* OUTER_UDP_CKSUM |
1545 0x33, /* OUTER_UDP_CKSUM |
1546 * OUTER_IPV4 | OUTER_IP_CKSUM
1548 0x34, /* OUTER_UDP_CKSUM |
1551 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1554 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1557 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1558 * OUTER_IPV4 | OUTER_IP_CKSUM
1563 /* Extract olflags to translate to oltype & iltype */
1564 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1565 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1568 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1569 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1571 const uint32x4_t tshft_4 = {
1577 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1578 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1581 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1582 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1584 const uint8x16_t shuf_mask5 = {
1585 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1586 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1588 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1589 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1591 /* Extract outer and inner header ol_flags */
1592 const uint64x2_t oi_cksum_mask = {
1597 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1598 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1600 /* Extract OUTER_UDP_CKSUM bit 41 and
1604 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1605 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1607 /* Shift right oltype by 2 and iltype by 4
1608 * to start oltype nibble from BIT(58)
1609 * instead of BIT(56) and iltype nibble from BIT(48)
1610 * instead of BIT(52).
1612 const int8x16_t tshft5 = {
1613 8, 8, 8, 8, 8, 8, -4, -2,
1614 8, 8, 8, 8, 8, 8, -4, -2,
1617 xtmp128 = vshlq_u8(xtmp128, tshft5);
1618 ytmp128 = vshlq_u8(ytmp128, tshft5);
1620 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1621 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1623 const int8x16_t tshft3 = {
1624 -1, 0, -1, 0, 0, 0, 0, 0,
1625 -1, 0, -1, 0, 0, 0, 0, 0,
1628 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1629 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1631 /* Mark Bit(4) of oltype */
1632 const uint64x2_t oi_cksum_mask2 = {
1637 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1638 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1641 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1642 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1644 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1645 * Bit 56:63 of oltype and place it in corresponding
1646 * place in senddesc_w1.
1648 const uint8x16_t shuf_mask0 = {
1649 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1650 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1653 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1654 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1656 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1657 * l3len, l2len, ol3len, ol2len.
1658 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1660 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1662 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1663 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1665 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1666 vshlq_n_u32(senddesc01_w1, 8));
1667 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1668 vshlq_n_u32(senddesc23_w1, 8));
1670 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1671 senddesc01_w1 = vaddq_u8(
1672 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1673 senddesc23_w1 = vaddq_u8(
1674 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1676 /* Move ltypes to senddesc*_w1 */
1677 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1678 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1681 xmask01 = vdupq_n_u64(0);
1683 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1688 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1693 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1698 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1702 xmask01 = vshlq_n_u64(xmask01, 20);
1703 xmask23 = vshlq_n_u64(xmask23, 20);
1705 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1706 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1708 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1709 /* Tx ol_flag for vlan. */
1710 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1711 /* Bit enable for VLAN1 */
1712 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1713 /* Tx ol_flag for QnQ. */
1714 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1715 /* Bit enable for VLAN0 */
1716 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1717 /* Load vlan values from packet. outer is VLAN 0 */
1718 uint64x2_t ext01 = {
1719 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1720 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1721 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1722 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1724 uint64x2_t ext23 = {
1725 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1726 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1727 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1728 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1731 /* Get ol_flags of the packets. */
1732 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1733 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1735 /* ORR vlan outer/inner values into cmd. */
1736 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1737 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1739 /* Test for offload enable bits and generate masks. */
1740 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1742 vandq_u64(vtstq_u64(xtmp128, olq),
1744 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1746 vandq_u64(vtstq_u64(ytmp128, olq),
1749 /* Set vlan enable bits into cmd based on mask. */
1750 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1751 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1754 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1755 /* Tx ol_flag for timestam. */
1756 const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
1757 PKT_TX_IEEE1588_TMST};
1758 /* Set send mem alg to SUB. */
1759 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1760 /* Increment send mem address by 8. */
1761 const uint64x2_t addr = {0x8, 0x8};
1763 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1764 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1766 /* Check if timestamp is requested and generate inverted
1767 * mask as we need not make any changes to default cmd
1770 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1771 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1773 /* Change send mem address to an 8 byte offset when
1774 * TSTMP is disabled.
1776 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1777 vandq_u64(xtmp128, addr));
1778 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1779 vandq_u64(ytmp128, addr));
1780 /* Change send mem alg to SUB when TSTMP is disabled. */
1781 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1782 vandq_u64(xtmp128, alg));
1783 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1784 vandq_u64(ytmp128, alg));
1786 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1787 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1788 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1789 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1792 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1793 const uint64_t lso_fmt = txq->lso_tun_fmt;
1794 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1795 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1797 /* Extract SD W1 as we need to set L4 types. */
1798 vst1q_u64(sd_w1, senddesc01_w1);
1799 vst1q_u64(sd_w1 + 2, senddesc23_w1);
1801 /* Extract SX W0 as we need to set LSO fields. */
1802 vst1q_u64(sx_w0, sendext01_w0);
1803 vst1q_u64(sx_w0 + 2, sendext23_w0);
1805 /* Extract ol_flags. */
1806 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1807 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1809 /* Prepare individual mbufs. */
1810 cn10k_nix_prepare_tso(tx_pkts[0],
1811 (union nix_send_hdr_w1_u *)&sd_w1[0],
1812 (union nix_send_ext_w0_u *)&sx_w0[0],
1813 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
1815 cn10k_nix_prepare_tso(tx_pkts[1],
1816 (union nix_send_hdr_w1_u *)&sd_w1[1],
1817 (union nix_send_ext_w0_u *)&sx_w0[1],
1818 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
1820 cn10k_nix_prepare_tso(tx_pkts[2],
1821 (union nix_send_hdr_w1_u *)&sd_w1[2],
1822 (union nix_send_ext_w0_u *)&sx_w0[2],
1823 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
1825 cn10k_nix_prepare_tso(tx_pkts[3],
1826 (union nix_send_hdr_w1_u *)&sd_w1[3],
1827 (union nix_send_ext_w0_u *)&sx_w0[3],
1828 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
1830 senddesc01_w1 = vld1q_u64(sd_w1);
1831 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1833 sendext01_w0 = vld1q_u64(sx_w0);
1834 sendext23_w0 = vld1q_u64(sx_w0 + 2);
1837 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1838 !(flags & NIX_TX_MULTI_SEG_F)) {
1839 /* Set don't free bit if reference count > 1 */
1840 xmask01 = vdupq_n_u64(0);
1843 /* Move mbufs to iova */
1844 mbuf0 = (uint64_t *)tx_pkts[0];
1845 mbuf1 = (uint64_t *)tx_pkts[1];
1846 mbuf2 = (uint64_t *)tx_pkts[2];
1847 mbuf3 = (uint64_t *)tx_pkts[3];
1849 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1850 vsetq_lane_u64(0x80000, xmask01, 0);
1852 __mempool_check_cookies(
1853 ((struct rte_mbuf *)mbuf0)->pool,
1854 (void **)&mbuf0, 1, 0);
1856 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1857 vsetq_lane_u64(0x80000, xmask01, 1);
1859 __mempool_check_cookies(
1860 ((struct rte_mbuf *)mbuf1)->pool,
1861 (void **)&mbuf1, 1, 0);
1863 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1864 vsetq_lane_u64(0x80000, xmask23, 0);
1866 __mempool_check_cookies(
1867 ((struct rte_mbuf *)mbuf2)->pool,
1868 (void **)&mbuf2, 1, 0);
1870 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1871 vsetq_lane_u64(0x80000, xmask23, 1);
1873 __mempool_check_cookies(
1874 ((struct rte_mbuf *)mbuf3)->pool,
1875 (void **)&mbuf3, 1, 0);
1876 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1877 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1878 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1879 /* Move mbufs to iova */
1880 mbuf0 = (uint64_t *)tx_pkts[0];
1881 mbuf1 = (uint64_t *)tx_pkts[1];
1882 mbuf2 = (uint64_t *)tx_pkts[2];
1883 mbuf3 = (uint64_t *)tx_pkts[3];
1885 /* Mark mempool object as "put" since
1886 * it is freed by NIX
1888 __mempool_check_cookies(
1889 ((struct rte_mbuf *)mbuf0)->pool,
1890 (void **)&mbuf0, 1, 0);
1892 __mempool_check_cookies(
1893 ((struct rte_mbuf *)mbuf1)->pool,
1894 (void **)&mbuf1, 1, 0);
1896 __mempool_check_cookies(
1897 ((struct rte_mbuf *)mbuf2)->pool,
1898 (void **)&mbuf2, 1, 0);
1900 __mempool_check_cookies(
1901 ((struct rte_mbuf *)mbuf3)->pool,
1902 (void **)&mbuf3, 1, 0);
1905 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1906 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1907 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1908 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1909 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1911 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1912 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1913 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1914 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1916 if (flags & NIX_TX_NEED_EXT_HDR) {
1917 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1918 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1919 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1920 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1923 if (flags & NIX_TX_MULTI_SEG_F) {
1927 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
1930 LMT_OFF(laddr, lnum,
1932 &wd.data128, &shift,
1935 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1936 /* Store the prepared send desc to LMT lines */
1937 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1938 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1939 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1940 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1941 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
1942 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
1943 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
1944 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
1945 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
1947 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1948 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1949 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1950 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
1951 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
1952 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
1953 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
1954 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
1956 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1957 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1958 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1959 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
1960 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
1961 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
1963 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1964 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1965 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1966 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
1967 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
1968 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
1972 /* Store the prepared send desc to LMT lines */
1973 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1974 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
1975 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
1976 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
1977 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
1978 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
1979 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
1980 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
1984 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1987 if (flags & NIX_TX_MULTI_SEG_F)
1990 if (flags & NIX_TX_VWQE_F)
1991 roc_sso_hws_head_wait(base);
1995 if (!(flags & NIX_TX_MULTI_SEG_F))
1996 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
1998 pa = io_addr | (wd.data[0] & 0x7) << 4;
1999 wd.data[0] &= ~0x7ULL;
2001 if (flags & NIX_TX_MULTI_SEG_F)
2004 wd.data[0] |= (15ULL << 12);
2005 wd.data[0] |= (uint64_t)lmt_id;
2008 roc_lmt_submit_steorl(wd.data[0], pa);
2010 if (!(flags & NIX_TX_MULTI_SEG_F))
2011 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2013 pa = io_addr | (wd.data[1] & 0x7) << 4;
2014 wd.data[1] &= ~0x7ULL;
2016 if (flags & NIX_TX_MULTI_SEG_F)
2019 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2020 wd.data[1] |= (uint64_t)(lmt_id + 16);
2023 roc_lmt_submit_steorl(wd.data[1], pa);
2025 if (!(flags & NIX_TX_MULTI_SEG_F))
2026 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2028 pa = io_addr | (wd.data[0] & 0x7) << 4;
2029 wd.data[0] &= ~0x7ULL;
2031 if (flags & NIX_TX_MULTI_SEG_F)
2034 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2035 wd.data[0] |= lmt_id;
2038 roc_lmt_submit_steorl(wd.data[0], pa);
2046 if (unlikely(scalar)) {
2047 if (flags & NIX_TX_MULTI_SEG_F)
2048 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2052 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2060 static __rte_always_inline uint16_t
2061 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2062 uint16_t pkts, uint64_t *cmd, uintptr_t base,
2063 const uint16_t flags)
2065 RTE_SET_USED(tx_queue);
2066 RTE_SET_USED(tx_pkts);
2069 RTE_SET_USED(flags);
2075 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2076 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2077 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2078 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2079 #define TSO_F NIX_TX_OFFLOAD_TSO_F
2080 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2082 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2083 #define NIX_TX_FASTPATH_MODES \
2084 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
2085 NIX_TX_OFFLOAD_NONE) \
2086 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
2088 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
2090 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
2091 OL3OL4CSUM_F | L3L4CSUM_F) \
2092 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
2094 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
2095 VLAN_F | L3L4CSUM_F) \
2096 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
2097 VLAN_F | OL3OL4CSUM_F) \
2098 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
2099 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2100 T(noff, 0, 0, 1, 0, 0, 0, 4, \
2102 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
2103 NOFF_F | L3L4CSUM_F) \
2104 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
2105 NOFF_F | OL3OL4CSUM_F) \
2106 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
2107 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2108 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
2110 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
2111 NOFF_F | VLAN_F | L3L4CSUM_F) \
2112 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
2113 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2114 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
2115 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2116 T(tso, 0, 1, 0, 0, 0, 0, 6, \
2118 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
2119 TSO_F | L3L4CSUM_F) \
2120 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
2121 TSO_F | OL3OL4CSUM_F) \
2122 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
2123 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2124 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
2126 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
2127 TSO_F | VLAN_F | L3L4CSUM_F) \
2128 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
2129 TSO_F | VLAN_F | OL3OL4CSUM_F) \
2130 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
2131 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2132 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
2134 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
2135 TSO_F | NOFF_F | L3L4CSUM_F) \
2136 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
2137 TSO_F | NOFF_F | OL3OL4CSUM_F) \
2138 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
2139 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2140 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
2141 TSO_F | NOFF_F | VLAN_F) \
2142 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
2143 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2144 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
2145 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2146 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
2147 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2148 T(ts, 1, 0, 0, 0, 0, 0, 8, \
2150 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
2151 TSP_F | L3L4CSUM_F) \
2152 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
2153 TSP_F | OL3OL4CSUM_F) \
2154 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
2155 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2156 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
2158 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
2159 TSP_F | VLAN_F | L3L4CSUM_F) \
2160 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
2161 TSP_F | VLAN_F | OL3OL4CSUM_F) \
2162 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
2163 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2164 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
2166 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
2167 TSP_F | NOFF_F | L3L4CSUM_F) \
2168 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
2169 TSP_F | NOFF_F | OL3OL4CSUM_F) \
2170 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
2171 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2172 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
2173 TSP_F | NOFF_F | VLAN_F) \
2174 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
2175 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2176 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
2177 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2178 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
2179 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2180 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
2182 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
2183 TSP_F | TSO_F | L3L4CSUM_F) \
2184 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
2185 TSP_F | TSO_F | OL3OL4CSUM_F) \
2186 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
2187 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2188 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
2189 TSP_F | TSO_F | VLAN_F) \
2190 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
2191 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2192 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
2193 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2194 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
2195 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2196 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
2197 TSP_F | TSO_F | NOFF_F) \
2198 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
2199 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2200 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
2201 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2202 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
2203 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2204 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
2205 TSP_F | TSO_F | NOFF_F | VLAN_F) \
2206 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
2207 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2208 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
2209 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2210 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
2211 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2213 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
2214 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
2215 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2217 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
2218 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2220 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
2221 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2223 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2224 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2226 NIX_TX_FASTPATH_MODES
2229 #endif /* __CN10K_TX_H__ */