1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
16 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
18 /* Flags to control xmit_prepare function.
19 * Defining it from backwards to denote its been
20 * not used as offload flags to pick function
22 #define NIX_TX_VWQE_F BIT(14)
23 #define NIX_TX_MULTI_SEG_F BIT(15)
25 #define NIX_TX_NEED_SEND_HDR_W1 \
26 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
27 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
29 #define NIX_TX_NEED_EXT_HDR \
30 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
35 /* Cached value is low, Update the fc_cache_pkts */ \
36 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
37 /* Multiply with sqe_per_sqb to express in pkts */ \
38 (txq)->fc_cache_pkts = \
39 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
40 << (txq)->sqes_per_sqb_log2; \
41 /* Check it again for the room */ \
42 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
47 /* Encoded number of segments to number of dwords macro, each value of nb_segs
48 * is encoded as 4bits.
50 #define NIX_SEGDW_MAGIC 0x76654432210ULL
52 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
54 #define LMT_OFF(lmt_addr, lmt_num, offset) \
55 (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
57 /* Function to determine no of tx subdesc required in case ext
58 * sub desc is enabled.
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
63 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
66 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
71 static __rte_always_inline uint8_t
72 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
74 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
75 << ROC_LMT_LINES_PER_CORE_LOG2;
78 static __rte_always_inline uint8_t
79 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
81 return (flags & NIX_TX_NEED_EXT_HDR) ?
82 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
86 static __rte_always_inline uint64_t
87 cn10k_nix_tx_steor_data(const uint16_t flags)
89 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
92 /* This will be moved to addr area */
94 /* 15 vector sizes for single seg */
114 static __rte_always_inline uint8_t
115 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
117 return ((flags & NIX_TX_NEED_EXT_HDR) ?
118 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
122 static __rte_always_inline uint64_t
123 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
125 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
128 /* This will be moved to addr area */
130 /* 15 vector sizes for single seg */
150 static __rte_always_inline void
151 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
152 const uint16_t flags)
155 cmd[0] = txq->send_hdr_w0;
159 /* Send ext if present */
160 if (flags & NIX_TX_NEED_EXT_HDR) {
161 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
170 static __rte_always_inline void
171 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
173 uint64_t mask, ol_flags = m->ol_flags;
175 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
176 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
177 uint16_t *iplen, *oiplen, *oudplen;
178 uint16_t lso_sb, paylen;
180 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
181 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
182 m->l2_len + m->l3_len + m->l4_len;
184 /* Reduce payload len from base headers */
185 paylen = m->pkt_len - lso_sb;
187 /* Get iplen position assuming no tunnel hdr */
188 iplen = (uint16_t *)(mdata + m->l2_len +
189 (2 << !!(ol_flags & PKT_TX_IPV6)));
190 /* Handle tunnel tso */
191 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
192 (ol_flags & PKT_TX_TUNNEL_MASK)) {
193 const uint8_t is_udp_tun =
194 (CNXK_NIX_UDP_TUN_BITMASK >>
195 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
198 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
200 PKT_TX_OUTER_IPV6)));
201 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
204 /* Update format for UDP tunneled packet */
206 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
207 m->outer_l3_len + 4);
208 *oudplen = rte_cpu_to_be_16(
209 rte_be_to_cpu_16(*oudplen) - paylen);
212 /* Update iplen position to inner ip hdr */
213 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
215 (2 << !!(ol_flags & PKT_TX_IPV6)));
218 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
222 static __rte_always_inline void
223 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
224 const uint16_t flags, const uint64_t lso_tun_fmt)
226 struct nix_send_ext_s *send_hdr_ext;
227 struct nix_send_hdr_s *send_hdr;
228 uint64_t ol_flags = 0, mask;
229 union nix_send_hdr_w1_u w1;
230 union nix_send_sg_s *sg;
232 send_hdr = (struct nix_send_hdr_s *)cmd;
233 if (flags & NIX_TX_NEED_EXT_HDR) {
234 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
235 sg = (union nix_send_sg_s *)(cmd + 4);
236 /* Clear previous markings */
237 send_hdr_ext->w0.lso = 0;
238 send_hdr_ext->w1.u = 0;
240 sg = (union nix_send_sg_s *)(cmd + 2);
243 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
244 ol_flags = m->ol_flags;
248 if (!(flags & NIX_TX_MULTI_SEG_F)) {
249 send_hdr->w0.total = m->data_len;
251 roc_npa_aura_handle_to_aura(m->pool->pool_id);
256 * 3 => IPV4 with csum
258 * L3type and L3ptr needs to be set for either
259 * L3 csum or L4 csum or LSO
263 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
264 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
265 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
266 const uint8_t ol3type =
267 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
268 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
269 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
272 w1.ol3type = ol3type;
273 mask = 0xffffull << ((!!ol3type) << 4);
274 w1.ol3ptr = ~mask & m->outer_l2_len;
275 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
278 w1.ol4type = csum + (csum << 1);
281 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
282 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
283 w1.il3ptr = w1.ol4ptr + m->l2_len;
284 w1.il4ptr = w1.il3ptr + m->l3_len;
285 /* Increment it by 1 if it is IPV4 as 3 is with csum */
286 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
289 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
291 /* In case of no tunnel header use only
292 * shift IL3/IL4 fields a bit to use
293 * OL3/OL4 for header checksum
296 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
297 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
299 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
300 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
301 const uint8_t outer_l2_len = m->outer_l2_len;
304 w1.ol3ptr = outer_l2_len;
305 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
306 /* Increment it by 1 if it is IPV4 as 3 is with csum */
307 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
308 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
309 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
312 w1.ol4type = csum + (csum << 1);
314 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
315 const uint8_t l2_len = m->l2_len;
317 /* Always use OLXPTR and OLXTYPE when only
318 * when one header is present
323 w1.ol4ptr = l2_len + m->l3_len;
324 /* Increment it by 1 if it is IPV4 as 3 is with csum */
325 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
326 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
327 !!(ol_flags & PKT_TX_IP_CKSUM);
330 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
333 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
334 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
335 /* HW will update ptr after vlan0 update */
336 send_hdr_ext->w1.vlan1_ins_ptr = 12;
337 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
339 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
340 /* 2B before end of l2 header */
341 send_hdr_ext->w1.vlan0_ins_ptr = 12;
342 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
345 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
349 mask = -(!w1.il3type);
350 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
352 send_hdr_ext->w0.lso_sb = lso_sb;
353 send_hdr_ext->w0.lso = 1;
354 send_hdr_ext->w0.lso_mps = m->tso_segsz;
355 send_hdr_ext->w0.lso_format =
356 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
357 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
359 /* Handle tunnel tso */
360 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
361 (ol_flags & PKT_TX_TUNNEL_MASK)) {
362 const uint8_t is_udp_tun =
363 (CNXK_NIX_UDP_TUN_BITMASK >>
364 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
366 uint8_t shift = is_udp_tun ? 32 : 0;
368 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
369 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
371 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
372 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
373 /* Update format for UDP tunneled packet */
374 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
378 if (flags & NIX_TX_NEED_SEND_HDR_W1)
379 send_hdr->w1.u = w1.u;
381 if (!(flags & NIX_TX_MULTI_SEG_F)) {
382 sg->seg1_size = m->data_len;
383 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
385 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
386 /* DF bit = 1 if refcount of current mbuf or parent mbuf
388 * DF bit = 0 otherwise
390 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
392 /* Mark mempool object as "put" since it is freed by NIX */
393 if (!send_hdr->w0.df)
394 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
397 /* With minimal offloads, 'cmd' being local could be optimized out to
398 * registers. In other cases, 'cmd' will be in stack. Intent is
399 * 'cmd' stores content from txq->cmd which is copied only once.
401 *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
403 if (flags & NIX_TX_NEED_EXT_HDR) {
404 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
407 /* In case of multi-seg, sg template is stored here */
408 *((union nix_send_sg_s *)lmt_addr) = *sg;
409 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
412 static __rte_always_inline void
413 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
414 const uint64_t ol_flags, const uint16_t no_segdw,
415 const uint16_t flags)
417 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
418 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
419 struct nix_send_ext_s *send_hdr_ext =
420 (struct nix_send_ext_s *)lmt_addr + 16;
421 uint64_t *lmt = (uint64_t *)lmt_addr;
422 uint16_t off = (no_segdw - 1) << 1;
423 struct nix_send_mem_s *send_mem;
425 send_mem = (struct nix_send_mem_s *)(lmt + off);
426 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
427 send_hdr_ext->w0.tstmp = 1;
428 if (flags & NIX_TX_MULTI_SEG_F) {
429 /* Retrieving the default desc values */
432 /* Using compiler barier to avoid voilation of C
435 rte_compiler_barrier();
438 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
439 * should not be recorded, hence changing the alg type to
440 * NIX_SENDMEMALG_SET and also changing send mem addr field to
441 * next 8 bytes as it corrpt the actual tx tstamp registered
444 send_mem->w0.subdc = NIX_SUBDC_MEM;
445 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
447 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
451 static __rte_always_inline uint16_t
452 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
454 struct nix_send_hdr_s *send_hdr;
455 union nix_send_sg_s *sg;
456 struct rte_mbuf *m_next;
457 uint64_t *slist, sg_u;
462 send_hdr = (struct nix_send_hdr_s *)cmd;
463 send_hdr->w0.total = m->pkt_len;
464 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
466 if (flags & NIX_TX_NEED_EXT_HDR)
471 sg = (union nix_send_sg_s *)&cmd[2 + off];
472 /* Clear sg->u header before use */
473 sg->u &= 0xFC00000000000000;
475 slist = &cmd[3 + off];
478 nb_segs = m->nb_segs;
480 /* Fill mbuf segments */
483 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
484 *slist = rte_mbuf_data_iova(m);
485 /* Set invert df if buffer is not to be freed by H/W */
486 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
487 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
488 /* Mark mempool object as "put" since it is freed by NIX
490 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
491 if (!(sg_u & (1ULL << (i + 55))))
492 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
497 if (i > 2 && nb_segs) {
499 /* Next SG subdesc */
500 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
503 sg = (union nix_send_sg_s *)slist;
512 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
513 /* Roundup extra dwords to multiple of 2 */
514 segdw = (segdw >> 1) + (segdw & 0x1);
516 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
517 send_hdr->w0.sizem1 = segdw - 1;
522 static __rte_always_inline uint16_t
523 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
524 uint64_t *cmd, uintptr_t base, const uint16_t flags)
526 struct cn10k_eth_txq *txq = tx_queue;
527 const rte_iova_t io_addr = txq->io_addr;
528 uintptr_t pa, lmt_addr = txq->lmt_base;
529 uint16_t lmt_id, burst, left, i;
530 uint64_t lso_tun_fmt;
533 if (!(flags & NIX_TX_VWQE_F)) {
534 NIX_XMIT_FC_OR_RETURN(txq, pkts);
535 /* Reduce the cached count */
536 txq->fc_cache_pkts -= pkts;
539 /* Get cmd skeleton */
540 cn10k_nix_tx_skeleton(txq, cmd, flags);
542 if (flags & NIX_TX_OFFLOAD_TSO_F)
543 lso_tun_fmt = txq->lso_tun_fmt;
545 /* Get LMT base address and LMT ID as lcore id */
546 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
549 burst = left > 32 ? 32 : left;
550 for (i = 0; i < burst; i++) {
551 /* Perform header writes for TSO, barrier at
552 * lmt steorl will suffice.
554 if (flags & NIX_TX_OFFLOAD_TSO_F)
555 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
557 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
559 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
560 tx_pkts[i]->ol_flags, 4, flags);
561 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
564 if (flags & NIX_TX_VWQE_F)
565 roc_sso_hws_head_wait(base);
569 data = cn10k_nix_tx_steor_data(flags);
570 pa = io_addr | (data & 0x7) << 4;
572 data |= (15ULL << 12);
573 data |= (uint64_t)lmt_id;
576 roc_lmt_submit_steorl(data, pa);
578 data = cn10k_nix_tx_steor_data(flags);
579 pa = io_addr | (data & 0x7) << 4;
581 data |= ((uint64_t)(burst - 17)) << 12;
582 data |= (uint64_t)(lmt_id + 16);
585 roc_lmt_submit_steorl(data, pa);
587 data = cn10k_nix_tx_steor_data(flags);
588 pa = io_addr | (data & 0x7) << 4;
590 data |= ((uint64_t)(burst - 1)) << 12;
594 roc_lmt_submit_steorl(data, pa);
600 /* Start processing another burst */
602 /* Reset lmt base addr */
603 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
604 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
611 static __rte_always_inline uint16_t
612 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
613 uint16_t pkts, uint64_t *cmd, uintptr_t base,
614 const uint16_t flags)
616 struct cn10k_eth_txq *txq = tx_queue;
617 uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
618 const rte_iova_t io_addr = txq->io_addr;
619 uint16_t segdw, lmt_id, burst, left, i;
620 uint64_t data0, data1;
621 uint64_t lso_tun_fmt;
625 NIX_XMIT_FC_OR_RETURN(txq, pkts);
627 cn10k_nix_tx_skeleton(txq, cmd, flags);
629 /* Reduce the cached count */
630 txq->fc_cache_pkts -= pkts;
632 if (flags & NIX_TX_OFFLOAD_TSO_F)
633 lso_tun_fmt = txq->lso_tun_fmt;
635 /* Get LMT base address and LMT ID as lcore id */
636 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
639 burst = left > 32 ? 32 : left;
642 for (i = 0; i < burst; i++) {
643 /* Perform header writes for TSO, barrier at
644 * lmt steorl will suffice.
646 if (flags & NIX_TX_OFFLOAD_TSO_F)
647 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
649 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
651 /* Store sg list directly on lmt line */
652 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
654 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
655 tx_pkts[i]->ol_flags, segdw,
657 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
658 data128 |= (((__uint128_t)(segdw - 1)) << shft);
662 if (flags & NIX_TX_VWQE_F)
663 roc_sso_hws_head_wait(base);
665 data0 = (uint64_t)data128;
666 data1 = (uint64_t)(data128 >> 64);
667 /* Make data0 similar to data1 */
671 pa0 = io_addr | (data0 & 0x7) << 4;
673 /* Move lmtst1..15 sz to bits 63:19 */
675 data0 |= (15ULL << 12);
676 data0 |= (uint64_t)lmt_id;
679 roc_lmt_submit_steorl(data0, pa0);
681 pa1 = io_addr | (data1 & 0x7) << 4;
684 data1 |= ((uint64_t)(burst - 17)) << 12;
685 data1 |= (uint64_t)(lmt_id + 16);
688 roc_lmt_submit_steorl(data1, pa1);
690 pa0 = io_addr | (data0 & 0x7) << 4;
692 /* Move lmtst1..15 sz to bits 63:19 */
694 data0 |= ((burst - 1) << 12);
695 data0 |= (uint64_t)lmt_id;
698 roc_lmt_submit_steorl(data0, pa0);
704 /* Start processing another burst */
706 /* Reset lmt base addr */
707 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
708 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
715 #if defined(RTE_ARCH_ARM64)
717 static __rte_always_inline void
718 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
719 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
720 const uint64_t flags, const uint64_t lso_tun_fmt)
725 if (!(ol_flags & PKT_TX_TCP_SEG))
728 mask = -(!w1->il3type);
729 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
733 w0->lso_mps = m->tso_segsz;
734 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
735 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
737 /* Handle tunnel tso */
738 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
739 (ol_flags & PKT_TX_TUNNEL_MASK)) {
740 const uint8_t is_udp_tun =
741 (CNXK_NIX_UDP_TUN_BITMASK >>
742 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
744 uint8_t shift = is_udp_tun ? 32 : 0;
746 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
747 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
749 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
750 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
751 /* Update format for UDP tunneled packet */
753 w0->lso_format = (lso_tun_fmt >> shift);
757 static __rte_always_inline void
758 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
759 union nix_send_hdr_w0_u *sh,
760 union nix_send_sg_s *sg, const uint32_t flags)
762 struct rte_mbuf *m_next;
763 uint64_t *slist, sg_u;
767 sh->total = m->pkt_len;
768 /* Clear sg->u header before use */
769 sg->u &= 0xFC00000000000000;
773 sg_u = sg_u | ((uint64_t)m->data_len);
775 nb_segs = m->nb_segs - 1;
778 /* Set invert df if buffer is not to be freed by H/W */
779 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
780 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
781 /* Mark mempool object as "put" since it is freed by NIX */
782 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
783 if (!(sg_u & (1ULL << 55)))
784 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
789 /* Fill mbuf segments */
792 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
793 *slist = rte_mbuf_data_iova(m);
794 /* Set invert df if buffer is not to be freed by H/W */
795 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
796 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
797 /* Mark mempool object as "put" since it is freed by NIX
799 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
800 if (!(sg_u & (1ULL << (i + 55))))
801 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
807 if (i > 2 && nb_segs) {
809 /* Next SG subdesc */
810 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
813 sg = (union nix_send_sg_s *)slist;
824 static __rte_always_inline void
825 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
826 uint64x2_t *cmd1, const uint8_t segdw,
827 const uint32_t flags)
829 union nix_send_hdr_w0_u sh;
830 union nix_send_sg_s sg;
832 if (m->nb_segs == 1) {
833 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
834 sg.u = vgetq_lane_u64(cmd1[0], 0);
835 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
836 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
839 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
840 sg.u = vgetq_lane_u64(cmd1[0], 0);
841 if (!(sg.u & (1ULL << 55)))
842 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
848 sh.u = vgetq_lane_u64(cmd0[0], 0);
849 sg.u = vgetq_lane_u64(cmd1[0], 0);
851 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
853 sh.sizem1 = segdw - 1;
854 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
855 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
858 #define NIX_DESCS_PER_LOOP 4
860 static __rte_always_inline uint8_t
861 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
862 uint64x2_t *cmd1, uint64x2_t *cmd2,
863 uint64x2_t *cmd3, uint8_t *segdw,
864 uint64_t *lmt_addr, __uint128_t *data128,
865 uint8_t *shift, const uint16_t flags)
867 uint8_t j, off, lmt_used;
869 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
870 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
871 /* No segments in 4 consecutive packets. */
872 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
873 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
874 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
877 vst1q_u64(lmt_addr, cmd0[0]);
878 vst1q_u64(lmt_addr + 2, cmd1[0]);
879 vst1q_u64(lmt_addr + 4, cmd0[1]);
880 vst1q_u64(lmt_addr + 6, cmd1[1]);
881 vst1q_u64(lmt_addr + 8, cmd0[2]);
882 vst1q_u64(lmt_addr + 10, cmd1[2]);
883 vst1q_u64(lmt_addr + 12, cmd0[3]);
884 vst1q_u64(lmt_addr + 14, cmd1[3]);
886 *data128 |= ((__uint128_t)7) << *shift;
894 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
895 /* Fit consecutive packets in same LMTLINE. */
896 if ((segdw[j] + segdw[j + 1]) <= 8) {
897 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
898 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
901 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
904 segdw[j + 1], flags);
905 /* TSTAMP takes 4 each, no segs. */
906 vst1q_u64(lmt_addr, cmd0[j]);
907 vst1q_u64(lmt_addr + 2, cmd2[j]);
908 vst1q_u64(lmt_addr + 4, cmd1[j]);
909 vst1q_u64(lmt_addr + 6, cmd3[j]);
911 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
912 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
913 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
914 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
915 } else if (flags & NIX_TX_NEED_EXT_HDR) {
916 /* EXT header take 3 each, space for 2 segs.*/
917 cn10k_nix_prepare_mseg_vec(mbufs[j],
921 vst1q_u64(lmt_addr, cmd0[j]);
922 vst1q_u64(lmt_addr + 2, cmd2[j]);
923 vst1q_u64(lmt_addr + 4, cmd1[j]);
926 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
930 segdw[j + 1], flags);
931 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
932 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
933 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
935 cn10k_nix_prepare_mseg_vec(mbufs[j],
939 vst1q_u64(lmt_addr, cmd0[j]);
940 vst1q_u64(lmt_addr + 2, cmd1[j]);
943 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
947 segdw[j + 1], flags);
948 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
949 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
951 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
956 if ((flags & NIX_TX_NEED_EXT_HDR) &&
957 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
958 cn10k_nix_prepare_mseg_vec(mbufs[j],
962 vst1q_u64(lmt_addr, cmd0[j]);
963 vst1q_u64(lmt_addr + 2, cmd2[j]);
964 vst1q_u64(lmt_addr + 4, cmd1[j]);
967 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
968 } else if (flags & NIX_TX_NEED_EXT_HDR) {
969 cn10k_nix_prepare_mseg_vec(mbufs[j],
973 vst1q_u64(lmt_addr, cmd0[j]);
974 vst1q_u64(lmt_addr + 2, cmd2[j]);
975 vst1q_u64(lmt_addr + 4, cmd1[j]);
977 cn10k_nix_prepare_mseg_vec(mbufs[j],
981 vst1q_u64(lmt_addr, cmd0[j]);
982 vst1q_u64(lmt_addr + 2, cmd1[j]);
984 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
995 static __rte_always_inline uint16_t
996 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
997 uint16_t pkts, uint64_t *cmd, uintptr_t base,
998 const uint16_t flags)
1000 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1001 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1002 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1003 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1004 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1005 uint64x2_t senddesc01_w0, senddesc23_w0;
1006 uint64x2_t senddesc01_w1, senddesc23_w1;
1007 uint16_t left, scalar, burst, i, lmt_id;
1008 uint64x2_t sendext01_w0, sendext23_w0;
1009 uint64x2_t sendext01_w1, sendext23_w1;
1010 uint64x2_t sendmem01_w0, sendmem23_w0;
1011 uint64x2_t sendmem01_w1, sendmem23_w1;
1012 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1013 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1014 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1015 struct cn10k_eth_txq *txq = tx_queue;
1016 uintptr_t laddr = txq->lmt_base;
1017 rte_iova_t io_addr = txq->io_addr;
1018 uint64x2_t ltypes01, ltypes23;
1019 uint64x2_t xtmp128, ytmp128;
1020 uint64x2_t xmask01, xmask23;
1021 uint8_t lnum, shift;
1023 __uint128_t data128;
1027 if (!(flags & NIX_TX_VWQE_F)) {
1028 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1029 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1030 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1031 /* Reduce the cached count */
1032 txq->fc_cache_pkts -= pkts;
1034 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1035 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1038 /* Perform header writes before barrier for TSO */
1039 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1040 for (i = 0; i < pkts; i++)
1041 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1044 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1045 senddesc23_w0 = senddesc01_w0;
1046 senddesc01_w1 = vdupq_n_u64(0);
1047 senddesc23_w1 = senddesc01_w1;
1048 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1049 sgdesc23_w0 = sgdesc01_w0;
1051 /* Load command defaults into vector variables. */
1052 if (flags & NIX_TX_NEED_EXT_HDR) {
1053 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1054 sendext23_w0 = sendext01_w0;
1055 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1056 sendext23_w1 = sendext01_w1;
1057 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1058 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1059 sendmem23_w0 = sendmem01_w0;
1060 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1061 sendmem23_w1 = sendmem01_w1;
1065 /* Get LMT base address and LMT ID as lcore id */
1066 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1069 /* Number of packets to prepare depends on offloads enabled. */
1070 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1071 cn10k_nix_pkts_per_vec_brst(flags) :
1073 if (flags & NIX_TX_MULTI_SEG_F) {
1079 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1080 if (flags & NIX_TX_MULTI_SEG_F) {
1083 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1084 struct rte_mbuf *m = tx_pkts[j];
1086 /* Get dwords based on nb_segs. */
1087 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1088 /* Add dwords based on offloads. */
1089 segdw[j] += 1 + /* SEND HDR */
1090 !!(flags & NIX_TX_NEED_EXT_HDR) +
1091 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1094 /* Check if there are enough LMTLINES for this loop */
1095 if (lnum + 4 > 32) {
1096 uint8_t ldwords_con = 0, lneeded = 0;
1097 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1098 ldwords_con += segdw[j];
1099 if (ldwords_con > 8) {
1101 ldwords_con = segdw[j];
1105 if (lnum + lneeded > 32) {
1111 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1113 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1114 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1116 senddesc23_w0 = senddesc01_w0;
1117 sgdesc23_w0 = sgdesc01_w0;
1119 /* Clear vlan enables. */
1120 if (flags & NIX_TX_NEED_EXT_HDR) {
1121 sendext01_w1 = vbicq_u64(sendext01_w1,
1122 vdupq_n_u64(0x3FFFF00FFFF00));
1123 sendext23_w1 = sendext01_w1;
1126 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1127 /* Reset send mem alg to SETTSTMP from SUB*/
1128 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1129 vdupq_n_u64(BIT_ULL(59)));
1130 /* Reset send mem address to default. */
1132 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1133 sendmem23_w0 = sendmem01_w0;
1134 sendmem23_w1 = sendmem01_w1;
1137 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1138 /* Clear the LSO enable bit. */
1139 sendext01_w0 = vbicq_u64(sendext01_w0,
1140 vdupq_n_u64(BIT_ULL(14)));
1141 sendext23_w0 = sendext01_w0;
1144 /* Move mbufs to iova */
1145 mbuf0 = (uint64_t *)tx_pkts[0];
1146 mbuf1 = (uint64_t *)tx_pkts[1];
1147 mbuf2 = (uint64_t *)tx_pkts[2];
1148 mbuf3 = (uint64_t *)tx_pkts[3];
1150 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1151 offsetof(struct rte_mbuf, buf_iova));
1152 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1153 offsetof(struct rte_mbuf, buf_iova));
1154 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1155 offsetof(struct rte_mbuf, buf_iova));
1156 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1157 offsetof(struct rte_mbuf, buf_iova));
1159 * Get mbuf's, olflags, iova, pktlen, dataoff
1160 * dataoff_iovaX.D[0] = iova,
1161 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1162 * len_olflagsX.D[0] = ol_flags,
1163 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1165 dataoff_iova0 = vld1q_u64(mbuf0);
1166 len_olflags0 = vld1q_u64(mbuf0 + 2);
1167 dataoff_iova1 = vld1q_u64(mbuf1);
1168 len_olflags1 = vld1q_u64(mbuf1 + 2);
1169 dataoff_iova2 = vld1q_u64(mbuf2);
1170 len_olflags2 = vld1q_u64(mbuf2 + 2);
1171 dataoff_iova3 = vld1q_u64(mbuf3);
1172 len_olflags3 = vld1q_u64(mbuf3 + 2);
1174 /* Move mbufs to point pool */
1175 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1176 offsetof(struct rte_mbuf, pool) -
1177 offsetof(struct rte_mbuf, buf_iova));
1178 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1179 offsetof(struct rte_mbuf, pool) -
1180 offsetof(struct rte_mbuf, buf_iova));
1181 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1182 offsetof(struct rte_mbuf, pool) -
1183 offsetof(struct rte_mbuf, buf_iova));
1184 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1185 offsetof(struct rte_mbuf, pool) -
1186 offsetof(struct rte_mbuf, buf_iova));
1188 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1189 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1190 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1192 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1193 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1196 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1197 : [a] "+w"(senddesc01_w1)
1198 : [in] "r"(mbuf0 + 2)
1201 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1202 : [a] "+w"(senddesc01_w1)
1203 : [in] "r"(mbuf1 + 2)
1206 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1207 : [b] "+w"(senddesc23_w1)
1208 : [in] "r"(mbuf2 + 2)
1211 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1212 : [b] "+w"(senddesc23_w1)
1213 : [in] "r"(mbuf3 + 2)
1216 /* Get pool pointer alone */
1217 mbuf0 = (uint64_t *)*mbuf0;
1218 mbuf1 = (uint64_t *)*mbuf1;
1219 mbuf2 = (uint64_t *)*mbuf2;
1220 mbuf3 = (uint64_t *)*mbuf3;
1222 /* Get pool pointer alone */
1223 mbuf0 = (uint64_t *)*mbuf0;
1224 mbuf1 = (uint64_t *)*mbuf1;
1225 mbuf2 = (uint64_t *)*mbuf2;
1226 mbuf3 = (uint64_t *)*mbuf3;
1229 const uint8x16_t shuf_mask2 = {
1230 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1231 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1233 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1234 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1236 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1237 const uint64x2_t and_mask0 = {
1242 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1243 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1244 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1245 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1248 * Pick only 16 bits of pktlen preset at bits 63:32
1249 * and place them at bits 15:0.
1251 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1252 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1254 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1255 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1256 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1258 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1259 * pktlen at 15:0 position.
1261 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1262 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1263 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1264 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1266 /* Move mbuf to point to pool_id. */
1267 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1268 offsetof(struct rte_mempool, pool_id));
1269 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1270 offsetof(struct rte_mempool, pool_id));
1271 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1272 offsetof(struct rte_mempool, pool_id));
1273 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1274 offsetof(struct rte_mempool, pool_id));
1276 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1277 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1279 * Lookup table to translate ol_flags to
1280 * il3/il4 types. But we still use ol3/ol4 types in
1281 * senddesc_w1 as only one header processing is enabled.
1283 const uint8x16_t tbl = {
1284 /* [0-15] = il4type:il3type */
1285 0x04, /* none (IPv6 assumed) */
1286 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
1287 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
1288 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
1289 0x03, /* PKT_TX_IP_CKSUM */
1290 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
1291 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
1292 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
1293 0x02, /* PKT_TX_IPV4 */
1294 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
1295 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
1296 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
1297 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
1298 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1301 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1304 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1309 /* Extract olflags to translate to iltypes */
1310 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1311 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1314 * E(47):L3_LEN(9):L2_LEN(7+z)
1315 * E(47):L3_LEN(9):L2_LEN(7+z)
1317 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1318 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1320 /* Move OLFLAGS bits 55:52 to 51:48
1321 * with zeros preprended on the byte and rest
1324 xtmp128 = vshrq_n_u8(xtmp128, 4);
1325 ytmp128 = vshrq_n_u8(ytmp128, 4);
1327 * E(48):L3_LEN(8):L2_LEN(z+7)
1328 * E(48):L3_LEN(8):L2_LEN(z+7)
1330 const int8x16_t tshft3 = {
1331 -1, 0, 8, 8, 8, 8, 8, 8,
1332 -1, 0, 8, 8, 8, 8, 8, 8,
1335 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1336 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1339 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1340 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1342 /* Pick only relevant fields i.e Bit 48:55 of iltype
1343 * and place it in ol3/ol4type of senddesc_w1
1345 const uint8x16_t shuf_mask0 = {
1346 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1347 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1350 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1351 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1353 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1354 * a [E(32):E(16):OL3(8):OL2(8)]
1356 * a [E(32):E(16):(OL3+OL2):OL2]
1357 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1359 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1360 vshlq_n_u16(senddesc01_w1, 8));
1361 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1362 vshlq_n_u16(senddesc23_w1, 8));
1364 /* Move ltypes to senddesc*_w1 */
1365 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1366 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1367 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1368 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1370 * Lookup table to translate ol_flags to
1374 const uint8x16_t tbl = {
1375 /* [0-15] = ol4type:ol3type */
1377 0x03, /* OUTER_IP_CKSUM */
1378 0x02, /* OUTER_IPV4 */
1379 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1380 0x04, /* OUTER_IPV6 */
1381 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1382 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1383 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1386 0x00, /* OUTER_UDP_CKSUM */
1387 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1388 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1389 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1392 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1393 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1396 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1399 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1400 * OUTER_IPV4 | OUTER_IP_CKSUM
1404 /* Extract olflags to translate to iltypes */
1405 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1406 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1409 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1410 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1412 const uint8x16_t shuf_mask5 = {
1413 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1414 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1416 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1417 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1419 /* Extract outer ol flags only */
1420 const uint64x2_t o_cksum_mask = {
1425 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1426 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1428 /* Extract OUTER_UDP_CKSUM bit 41 and
1432 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1433 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1435 /* Shift oltype by 2 to start nibble from BIT(56)
1436 * instead of BIT(58)
1438 xtmp128 = vshrq_n_u8(xtmp128, 2);
1439 ytmp128 = vshrq_n_u8(ytmp128, 2);
1441 * E(48):L3_LEN(8):L2_LEN(z+7)
1442 * E(48):L3_LEN(8):L2_LEN(z+7)
1444 const int8x16_t tshft3 = {
1445 -1, 0, 8, 8, 8, 8, 8, 8,
1446 -1, 0, 8, 8, 8, 8, 8, 8,
1449 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1450 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1453 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1454 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1456 /* Pick only relevant fields i.e Bit 56:63 of oltype
1457 * and place it in ol3/ol4type of senddesc_w1
1459 const uint8x16_t shuf_mask0 = {
1460 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1461 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1464 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1465 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1467 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1468 * a [E(32):E(16):OL3(8):OL2(8)]
1470 * a [E(32):E(16):(OL3+OL2):OL2]
1471 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1473 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1474 vshlq_n_u16(senddesc01_w1, 8));
1475 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1476 vshlq_n_u16(senddesc23_w1, 8));
1478 /* Move ltypes to senddesc*_w1 */
1479 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1480 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1481 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1482 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1483 /* Lookup table to translate ol_flags to
1484 * ol4type, ol3type, il4type, il3type of senddesc_w1
1486 const uint8x16x2_t tbl = {{
1488 /* [0-15] = il4type:il3type */
1489 0x04, /* none (IPv6) */
1490 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1491 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1492 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1493 0x03, /* PKT_TX_IP_CKSUM */
1494 0x13, /* PKT_TX_IP_CKSUM |
1497 0x23, /* PKT_TX_IP_CKSUM |
1500 0x33, /* PKT_TX_IP_CKSUM |
1503 0x02, /* PKT_TX_IPV4 */
1504 0x12, /* PKT_TX_IPV4 |
1507 0x22, /* PKT_TX_IPV4 |
1510 0x32, /* PKT_TX_IPV4 |
1513 0x03, /* PKT_TX_IPV4 |
1516 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1519 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1522 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1528 /* [16-31] = ol4type:ol3type */
1530 0x03, /* OUTER_IP_CKSUM */
1531 0x02, /* OUTER_IPV4 */
1532 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1533 0x04, /* OUTER_IPV6 */
1534 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1535 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1536 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1539 0x00, /* OUTER_UDP_CKSUM */
1540 0x33, /* OUTER_UDP_CKSUM |
1543 0x32, /* OUTER_UDP_CKSUM |
1546 0x33, /* OUTER_UDP_CKSUM |
1547 * OUTER_IPV4 | OUTER_IP_CKSUM
1549 0x34, /* OUTER_UDP_CKSUM |
1552 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1555 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1558 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1559 * OUTER_IPV4 | OUTER_IP_CKSUM
1564 /* Extract olflags to translate to oltype & iltype */
1565 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1566 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1569 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1570 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1572 const uint32x4_t tshft_4 = {
1578 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1579 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1582 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1583 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1585 const uint8x16_t shuf_mask5 = {
1586 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1587 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1589 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1590 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1592 /* Extract outer and inner header ol_flags */
1593 const uint64x2_t oi_cksum_mask = {
1598 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1599 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1601 /* Extract OUTER_UDP_CKSUM bit 41 and
1605 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1606 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1608 /* Shift right oltype by 2 and iltype by 4
1609 * to start oltype nibble from BIT(58)
1610 * instead of BIT(56) and iltype nibble from BIT(48)
1611 * instead of BIT(52).
1613 const int8x16_t tshft5 = {
1614 8, 8, 8, 8, 8, 8, -4, -2,
1615 8, 8, 8, 8, 8, 8, -4, -2,
1618 xtmp128 = vshlq_u8(xtmp128, tshft5);
1619 ytmp128 = vshlq_u8(ytmp128, tshft5);
1621 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1622 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1624 const int8x16_t tshft3 = {
1625 -1, 0, -1, 0, 0, 0, 0, 0,
1626 -1, 0, -1, 0, 0, 0, 0, 0,
1629 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1630 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1632 /* Mark Bit(4) of oltype */
1633 const uint64x2_t oi_cksum_mask2 = {
1638 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1639 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1642 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1643 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1645 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1646 * Bit 56:63 of oltype and place it in corresponding
1647 * place in senddesc_w1.
1649 const uint8x16_t shuf_mask0 = {
1650 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1651 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1654 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1655 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1657 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1658 * l3len, l2len, ol3len, ol2len.
1659 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1661 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1663 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1664 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1666 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1667 vshlq_n_u32(senddesc01_w1, 8));
1668 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1669 vshlq_n_u32(senddesc23_w1, 8));
1671 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1672 senddesc01_w1 = vaddq_u8(
1673 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1674 senddesc23_w1 = vaddq_u8(
1675 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1677 /* Move ltypes to senddesc*_w1 */
1678 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1679 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1682 xmask01 = vdupq_n_u64(0);
1684 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1689 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1694 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1699 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1703 xmask01 = vshlq_n_u64(xmask01, 20);
1704 xmask23 = vshlq_n_u64(xmask23, 20);
1706 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1707 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1709 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1710 /* Tx ol_flag for vlan. */
1711 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1712 /* Bit enable for VLAN1 */
1713 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1714 /* Tx ol_flag for QnQ. */
1715 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1716 /* Bit enable for VLAN0 */
1717 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1718 /* Load vlan values from packet. outer is VLAN 0 */
1719 uint64x2_t ext01 = {
1720 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1721 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1722 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1723 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1725 uint64x2_t ext23 = {
1726 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1727 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1728 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1729 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1732 /* Get ol_flags of the packets. */
1733 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1734 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1736 /* ORR vlan outer/inner values into cmd. */
1737 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1738 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1740 /* Test for offload enable bits and generate masks. */
1741 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1743 vandq_u64(vtstq_u64(xtmp128, olq),
1745 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1747 vandq_u64(vtstq_u64(ytmp128, olq),
1750 /* Set vlan enable bits into cmd based on mask. */
1751 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1752 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1755 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1756 /* Tx ol_flag for timestam. */
1757 const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
1758 PKT_TX_IEEE1588_TMST};
1759 /* Set send mem alg to SUB. */
1760 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1761 /* Increment send mem address by 8. */
1762 const uint64x2_t addr = {0x8, 0x8};
1764 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1765 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1767 /* Check if timestamp is requested and generate inverted
1768 * mask as we need not make any changes to default cmd
1771 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1772 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1774 /* Change send mem address to an 8 byte offset when
1775 * TSTMP is disabled.
1777 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1778 vandq_u64(xtmp128, addr));
1779 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1780 vandq_u64(ytmp128, addr));
1781 /* Change send mem alg to SUB when TSTMP is disabled. */
1782 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1783 vandq_u64(xtmp128, alg));
1784 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1785 vandq_u64(ytmp128, alg));
1787 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1788 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1789 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1790 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1793 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1794 const uint64_t lso_fmt = txq->lso_tun_fmt;
1795 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1796 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1798 /* Extract SD W1 as we need to set L4 types. */
1799 vst1q_u64(sd_w1, senddesc01_w1);
1800 vst1q_u64(sd_w1 + 2, senddesc23_w1);
1802 /* Extract SX W0 as we need to set LSO fields. */
1803 vst1q_u64(sx_w0, sendext01_w0);
1804 vst1q_u64(sx_w0 + 2, sendext23_w0);
1806 /* Extract ol_flags. */
1807 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1808 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1810 /* Prepare individual mbufs. */
1811 cn10k_nix_prepare_tso(tx_pkts[0],
1812 (union nix_send_hdr_w1_u *)&sd_w1[0],
1813 (union nix_send_ext_w0_u *)&sx_w0[0],
1814 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
1816 cn10k_nix_prepare_tso(tx_pkts[1],
1817 (union nix_send_hdr_w1_u *)&sd_w1[1],
1818 (union nix_send_ext_w0_u *)&sx_w0[1],
1819 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
1821 cn10k_nix_prepare_tso(tx_pkts[2],
1822 (union nix_send_hdr_w1_u *)&sd_w1[2],
1823 (union nix_send_ext_w0_u *)&sx_w0[2],
1824 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
1826 cn10k_nix_prepare_tso(tx_pkts[3],
1827 (union nix_send_hdr_w1_u *)&sd_w1[3],
1828 (union nix_send_ext_w0_u *)&sx_w0[3],
1829 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
1831 senddesc01_w1 = vld1q_u64(sd_w1);
1832 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1834 sendext01_w0 = vld1q_u64(sx_w0);
1835 sendext23_w0 = vld1q_u64(sx_w0 + 2);
1838 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1839 !(flags & NIX_TX_MULTI_SEG_F)) {
1840 /* Set don't free bit if reference count > 1 */
1841 xmask01 = vdupq_n_u64(0);
1844 /* Move mbufs to iova */
1845 mbuf0 = (uint64_t *)tx_pkts[0];
1846 mbuf1 = (uint64_t *)tx_pkts[1];
1847 mbuf2 = (uint64_t *)tx_pkts[2];
1848 mbuf3 = (uint64_t *)tx_pkts[3];
1850 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1851 vsetq_lane_u64(0x80000, xmask01, 0);
1853 __mempool_check_cookies(
1854 ((struct rte_mbuf *)mbuf0)->pool,
1855 (void **)&mbuf0, 1, 0);
1857 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1858 vsetq_lane_u64(0x80000, xmask01, 1);
1860 __mempool_check_cookies(
1861 ((struct rte_mbuf *)mbuf1)->pool,
1862 (void **)&mbuf1, 1, 0);
1864 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1865 vsetq_lane_u64(0x80000, xmask23, 0);
1867 __mempool_check_cookies(
1868 ((struct rte_mbuf *)mbuf2)->pool,
1869 (void **)&mbuf2, 1, 0);
1871 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1872 vsetq_lane_u64(0x80000, xmask23, 1);
1874 __mempool_check_cookies(
1875 ((struct rte_mbuf *)mbuf3)->pool,
1876 (void **)&mbuf3, 1, 0);
1877 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1878 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1879 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1880 /* Move mbufs to iova */
1881 mbuf0 = (uint64_t *)tx_pkts[0];
1882 mbuf1 = (uint64_t *)tx_pkts[1];
1883 mbuf2 = (uint64_t *)tx_pkts[2];
1884 mbuf3 = (uint64_t *)tx_pkts[3];
1886 /* Mark mempool object as "put" since
1887 * it is freed by NIX
1889 __mempool_check_cookies(
1890 ((struct rte_mbuf *)mbuf0)->pool,
1891 (void **)&mbuf0, 1, 0);
1893 __mempool_check_cookies(
1894 ((struct rte_mbuf *)mbuf1)->pool,
1895 (void **)&mbuf1, 1, 0);
1897 __mempool_check_cookies(
1898 ((struct rte_mbuf *)mbuf2)->pool,
1899 (void **)&mbuf2, 1, 0);
1901 __mempool_check_cookies(
1902 ((struct rte_mbuf *)mbuf3)->pool,
1903 (void **)&mbuf3, 1, 0);
1906 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1907 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1908 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1909 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1910 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1912 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1913 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1914 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1915 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1917 if (flags & NIX_TX_NEED_EXT_HDR) {
1918 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1919 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1920 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1921 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1924 if (flags & NIX_TX_MULTI_SEG_F) {
1928 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
1931 LMT_OFF(laddr, lnum,
1933 &wd.data128, &shift,
1936 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1937 /* Store the prepared send desc to LMT lines */
1938 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1939 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1940 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1941 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1942 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
1943 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
1944 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
1945 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
1946 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
1948 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1949 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1950 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1951 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
1952 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
1953 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
1954 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
1955 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
1957 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1958 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1959 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1960 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
1961 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
1962 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
1964 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1965 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1966 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1967 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
1968 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
1969 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
1973 /* Store the prepared send desc to LMT lines */
1974 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1975 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
1976 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
1977 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
1978 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
1979 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
1980 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
1981 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
1985 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1988 if (flags & NIX_TX_MULTI_SEG_F)
1991 if (flags & NIX_TX_VWQE_F)
1992 roc_sso_hws_head_wait(base);
1996 if (!(flags & NIX_TX_MULTI_SEG_F))
1997 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
1999 pa = io_addr | (wd.data[0] & 0x7) << 4;
2000 wd.data[0] &= ~0x7ULL;
2002 if (flags & NIX_TX_MULTI_SEG_F)
2005 wd.data[0] |= (15ULL << 12);
2006 wd.data[0] |= (uint64_t)lmt_id;
2009 roc_lmt_submit_steorl(wd.data[0], pa);
2011 if (!(flags & NIX_TX_MULTI_SEG_F))
2012 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2014 pa = io_addr | (wd.data[1] & 0x7) << 4;
2015 wd.data[1] &= ~0x7ULL;
2017 if (flags & NIX_TX_MULTI_SEG_F)
2020 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2021 wd.data[1] |= (uint64_t)(lmt_id + 16);
2024 roc_lmt_submit_steorl(wd.data[1], pa);
2026 if (!(flags & NIX_TX_MULTI_SEG_F))
2027 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2029 pa = io_addr | (wd.data[0] & 0x7) << 4;
2030 wd.data[0] &= ~0x7ULL;
2032 if (flags & NIX_TX_MULTI_SEG_F)
2035 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2036 wd.data[0] |= lmt_id;
2039 roc_lmt_submit_steorl(wd.data[0], pa);
2047 if (unlikely(scalar)) {
2048 if (flags & NIX_TX_MULTI_SEG_F)
2049 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2053 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2061 static __rte_always_inline uint16_t
2062 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2063 uint16_t pkts, uint64_t *cmd, uintptr_t base,
2064 const uint16_t flags)
2066 RTE_SET_USED(tx_queue);
2067 RTE_SET_USED(tx_pkts);
2070 RTE_SET_USED(flags);
2076 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2077 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2078 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2079 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2080 #define TSO_F NIX_TX_OFFLOAD_TSO_F
2081 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2083 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2084 #define NIX_TX_FASTPATH_MODES \
2085 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
2086 NIX_TX_OFFLOAD_NONE) \
2087 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
2089 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
2091 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
2092 OL3OL4CSUM_F | L3L4CSUM_F) \
2093 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
2095 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
2096 VLAN_F | L3L4CSUM_F) \
2097 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
2098 VLAN_F | OL3OL4CSUM_F) \
2099 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
2100 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2101 T(noff, 0, 0, 1, 0, 0, 0, 4, \
2103 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
2104 NOFF_F | L3L4CSUM_F) \
2105 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
2106 NOFF_F | OL3OL4CSUM_F) \
2107 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
2108 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2109 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
2111 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
2112 NOFF_F | VLAN_F | L3L4CSUM_F) \
2113 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
2114 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2115 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
2116 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2117 T(tso, 0, 1, 0, 0, 0, 0, 6, \
2119 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
2120 TSO_F | L3L4CSUM_F) \
2121 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
2122 TSO_F | OL3OL4CSUM_F) \
2123 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
2124 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2125 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
2127 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
2128 TSO_F | VLAN_F | L3L4CSUM_F) \
2129 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
2130 TSO_F | VLAN_F | OL3OL4CSUM_F) \
2131 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
2132 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2133 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
2135 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
2136 TSO_F | NOFF_F | L3L4CSUM_F) \
2137 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
2138 TSO_F | NOFF_F | OL3OL4CSUM_F) \
2139 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
2140 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2141 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
2142 TSO_F | NOFF_F | VLAN_F) \
2143 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
2144 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2145 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
2146 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2147 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
2148 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2149 T(ts, 1, 0, 0, 0, 0, 0, 8, \
2151 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
2152 TSP_F | L3L4CSUM_F) \
2153 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
2154 TSP_F | OL3OL4CSUM_F) \
2155 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
2156 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2157 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
2159 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
2160 TSP_F | VLAN_F | L3L4CSUM_F) \
2161 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
2162 TSP_F | VLAN_F | OL3OL4CSUM_F) \
2163 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
2164 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2165 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
2167 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
2168 TSP_F | NOFF_F | L3L4CSUM_F) \
2169 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
2170 TSP_F | NOFF_F | OL3OL4CSUM_F) \
2171 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
2172 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2173 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
2174 TSP_F | NOFF_F | VLAN_F) \
2175 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
2176 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2177 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
2178 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2179 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
2180 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2181 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
2183 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
2184 TSP_F | TSO_F | L3L4CSUM_F) \
2185 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
2186 TSP_F | TSO_F | OL3OL4CSUM_F) \
2187 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
2188 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2189 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
2190 TSP_F | TSO_F | VLAN_F) \
2191 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
2192 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2193 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
2194 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2195 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
2196 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2197 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
2198 TSP_F | TSO_F | NOFF_F) \
2199 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
2200 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2201 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
2202 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2203 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
2204 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2205 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
2206 TSP_F | TSO_F | NOFF_F | VLAN_F) \
2207 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
2208 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2209 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
2210 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2211 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
2212 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2214 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
2215 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
2216 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2218 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
2219 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2221 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
2222 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2224 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2225 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2227 NIX_TX_FASTPATH_MODES
2230 #endif /* __CN10K_TX_H__ */