1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
17 /* Flags to control xmit_prepare function.
18 * Defining it from backwards to denote its been
19 * not used as offload flags to pick function
21 #define NIX_TX_MULTI_SEG_F BIT(15)
23 #define NIX_TX_NEED_SEND_HDR_W1 \
24 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
25 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
27 #define NIX_TX_NEED_EXT_HDR \
28 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
31 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
33 /* Cached value is low, Update the fc_cache_pkts */ \
34 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
35 /* Multiply with sqe_per_sqb to express in pkts */ \
36 (txq)->fc_cache_pkts = \
37 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
38 << (txq)->sqes_per_sqb_log2; \
39 /* Check it again for the room */ \
40 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
45 #define LMT_OFF(lmt_addr, lmt_num, offset) \
46 (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
48 /* Function to determine no of tx subdesc required in case ext
49 * sub desc is enabled.
51 static __rte_always_inline int
52 cn10k_nix_tx_ext_subs(const uint16_t flags)
54 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
57 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
62 static __rte_always_inline uint8_t
63 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
65 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
66 << ROC_LMT_LINES_PER_CORE_LOG2;
69 static __rte_always_inline uint8_t
70 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
72 return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
75 static __rte_always_inline uint64_t
76 cn10k_nix_tx_steor_data(const uint16_t flags)
78 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
81 /* This will be moved to addr area */
83 /* 15 vector sizes for single seg */
103 static __rte_always_inline uint64_t
104 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
106 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
109 /* This will be moved to addr area */
111 /* 15 vector sizes for single seg */
131 static __rte_always_inline void
132 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
133 const uint16_t flags)
136 cmd[0] = txq->send_hdr_w0;
140 /* Send ext if present */
141 if (flags & NIX_TX_NEED_EXT_HDR) {
142 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
151 static __rte_always_inline void
152 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
154 uint64_t mask, ol_flags = m->ol_flags;
156 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
157 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
158 uint16_t *iplen, *oiplen, *oudplen;
159 uint16_t lso_sb, paylen;
161 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
162 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
163 m->l2_len + m->l3_len + m->l4_len;
165 /* Reduce payload len from base headers */
166 paylen = m->pkt_len - lso_sb;
168 /* Get iplen position assuming no tunnel hdr */
169 iplen = (uint16_t *)(mdata + m->l2_len +
170 (2 << !!(ol_flags & PKT_TX_IPV6)));
171 /* Handle tunnel tso */
172 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
173 (ol_flags & PKT_TX_TUNNEL_MASK)) {
174 const uint8_t is_udp_tun =
175 (CNXK_NIX_UDP_TUN_BITMASK >>
176 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
179 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
181 PKT_TX_OUTER_IPV6)));
182 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
185 /* Update format for UDP tunneled packet */
187 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
188 m->outer_l3_len + 4);
189 *oudplen = rte_cpu_to_be_16(
190 rte_be_to_cpu_16(*oudplen) - paylen);
193 /* Update iplen position to inner ip hdr */
194 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
196 (2 << !!(ol_flags & PKT_TX_IPV6)));
199 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
203 static __rte_always_inline void
204 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
205 const uint16_t flags, const uint64_t lso_tun_fmt)
207 struct nix_send_ext_s *send_hdr_ext;
208 struct nix_send_hdr_s *send_hdr;
209 uint64_t ol_flags = 0, mask;
210 union nix_send_hdr_w1_u w1;
211 union nix_send_sg_s *sg;
213 send_hdr = (struct nix_send_hdr_s *)cmd;
214 if (flags & NIX_TX_NEED_EXT_HDR) {
215 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
216 sg = (union nix_send_sg_s *)(cmd + 4);
217 /* Clear previous markings */
218 send_hdr_ext->w0.lso = 0;
219 send_hdr_ext->w1.u = 0;
221 sg = (union nix_send_sg_s *)(cmd + 2);
224 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
225 ol_flags = m->ol_flags;
229 if (!(flags & NIX_TX_MULTI_SEG_F)) {
230 send_hdr->w0.total = m->data_len;
232 roc_npa_aura_handle_to_aura(m->pool->pool_id);
237 * 3 => IPV4 with csum
239 * L3type and L3ptr needs to be set for either
240 * L3 csum or L4 csum or LSO
244 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
245 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
246 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
247 const uint8_t ol3type =
248 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
249 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
250 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
253 w1.ol3type = ol3type;
254 mask = 0xffffull << ((!!ol3type) << 4);
255 w1.ol3ptr = ~mask & m->outer_l2_len;
256 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
259 w1.ol4type = csum + (csum << 1);
262 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
263 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
264 w1.il3ptr = w1.ol4ptr + m->l2_len;
265 w1.il4ptr = w1.il3ptr + m->l3_len;
266 /* Increment it by 1 if it is IPV4 as 3 is with csum */
267 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
270 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
272 /* In case of no tunnel header use only
273 * shift IL3/IL4 fields a bit to use
274 * OL3/OL4 for header checksum
277 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
278 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
280 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
281 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
282 const uint8_t outer_l2_len = m->outer_l2_len;
285 w1.ol3ptr = outer_l2_len;
286 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
287 /* Increment it by 1 if it is IPV4 as 3 is with csum */
288 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
289 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
290 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
293 w1.ol4type = csum + (csum << 1);
295 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
296 const uint8_t l2_len = m->l2_len;
298 /* Always use OLXPTR and OLXTYPE when only
299 * when one header is present
304 w1.ol4ptr = l2_len + m->l3_len;
305 /* Increment it by 1 if it is IPV4 as 3 is with csum */
306 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
307 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
308 !!(ol_flags & PKT_TX_IP_CKSUM);
311 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
314 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
315 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
316 /* HW will update ptr after vlan0 update */
317 send_hdr_ext->w1.vlan1_ins_ptr = 12;
318 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
320 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
321 /* 2B before end of l2 header */
322 send_hdr_ext->w1.vlan0_ins_ptr = 12;
323 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
326 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
330 mask = -(!w1.il3type);
331 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
333 send_hdr_ext->w0.lso_sb = lso_sb;
334 send_hdr_ext->w0.lso = 1;
335 send_hdr_ext->w0.lso_mps = m->tso_segsz;
336 send_hdr_ext->w0.lso_format =
337 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
338 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
340 /* Handle tunnel tso */
341 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
342 (ol_flags & PKT_TX_TUNNEL_MASK)) {
343 const uint8_t is_udp_tun =
344 (CNXK_NIX_UDP_TUN_BITMASK >>
345 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
347 uint8_t shift = is_udp_tun ? 32 : 0;
349 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
350 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
352 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
353 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
354 /* Update format for UDP tunneled packet */
355 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
359 if (flags & NIX_TX_NEED_SEND_HDR_W1)
360 send_hdr->w1.u = w1.u;
362 if (!(flags & NIX_TX_MULTI_SEG_F)) {
363 sg->seg1_size = m->data_len;
364 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
366 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
367 /* DF bit = 1 if refcount of current mbuf or parent mbuf
369 * DF bit = 0 otherwise
371 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
373 /* Mark mempool object as "put" since it is freed by NIX */
374 if (!send_hdr->w0.df)
375 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
378 /* With minimal offloads, 'cmd' being local could be optimized out to
379 * registers. In other cases, 'cmd' will be in stack. Intent is
380 * 'cmd' stores content from txq->cmd which is copied only once.
382 *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
384 if (flags & NIX_TX_NEED_EXT_HDR) {
385 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
388 /* In case of multi-seg, sg template is stored here */
389 *((union nix_send_sg_s *)lmt_addr) = *sg;
390 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
393 static __rte_always_inline void
394 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
395 const uint64_t ol_flags, const uint16_t no_segdw,
396 const uint16_t flags)
398 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
399 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
400 struct nix_send_ext_s *send_hdr_ext =
401 (struct nix_send_ext_s *)lmt_addr + 16;
402 uint64_t *lmt = (uint64_t *)lmt_addr;
403 uint16_t off = (no_segdw - 1) << 1;
404 struct nix_send_mem_s *send_mem;
406 send_mem = (struct nix_send_mem_s *)(lmt + off);
407 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
408 send_hdr_ext->w0.tstmp = 1;
409 if (flags & NIX_TX_MULTI_SEG_F) {
410 /* Retrieving the default desc values */
413 /* Using compiler barier to avoid voilation of C
416 rte_compiler_barrier();
419 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
420 * should not be recorded, hence changing the alg type to
421 * NIX_SENDMEMALG_SET and also changing send mem addr field to
422 * next 8 bytes as it corrpt the actual tx tstamp registered
425 send_mem->w0.subdc = NIX_SUBDC_MEM;
426 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
428 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
432 static __rte_always_inline uint16_t
433 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
435 struct nix_send_hdr_s *send_hdr;
436 union nix_send_sg_s *sg;
437 struct rte_mbuf *m_next;
438 uint64_t *slist, sg_u;
443 send_hdr = (struct nix_send_hdr_s *)cmd;
444 send_hdr->w0.total = m->pkt_len;
445 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
447 if (flags & NIX_TX_NEED_EXT_HDR)
452 sg = (union nix_send_sg_s *)&cmd[2 + off];
453 /* Clear sg->u header before use */
454 sg->u &= 0xFC00000000000000;
456 slist = &cmd[3 + off];
459 nb_segs = m->nb_segs;
461 /* Fill mbuf segments */
464 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
465 *slist = rte_mbuf_data_iova(m);
466 /* Set invert df if buffer is not to be freed by H/W */
467 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
468 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
469 /* Mark mempool object as "put" since it is freed by NIX
471 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
472 if (!(sg_u & (1ULL << (i + 55))))
473 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
478 if (i > 2 && nb_segs) {
480 /* Next SG subdesc */
481 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
484 sg = (union nix_send_sg_s *)slist;
493 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
494 /* Roundup extra dwords to multiple of 2 */
495 segdw = (segdw >> 1) + (segdw & 0x1);
497 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
498 send_hdr->w0.sizem1 = segdw - 1;
503 static __rte_always_inline uint16_t
504 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
505 uint64_t *cmd, const uint16_t flags)
507 struct cn10k_eth_txq *txq = tx_queue;
508 const rte_iova_t io_addr = txq->io_addr;
509 uintptr_t pa, lmt_addr = txq->lmt_base;
510 uint16_t lmt_id, burst, left, i;
511 uint64_t lso_tun_fmt;
514 NIX_XMIT_FC_OR_RETURN(txq, pkts);
516 /* Get cmd skeleton */
517 cn10k_nix_tx_skeleton(txq, cmd, flags);
519 /* Reduce the cached count */
520 txq->fc_cache_pkts -= pkts;
522 if (flags & NIX_TX_OFFLOAD_TSO_F)
523 lso_tun_fmt = txq->lso_tun_fmt;
525 /* Get LMT base address and LMT ID as lcore id */
526 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
529 burst = left > 32 ? 32 : left;
530 for (i = 0; i < burst; i++) {
531 /* Perform header writes for TSO, barrier at
532 * lmt steorl will suffice.
534 if (flags & NIX_TX_OFFLOAD_TSO_F)
535 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
537 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
539 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
540 tx_pkts[i]->ol_flags, 4, flags);
541 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
546 data = cn10k_nix_tx_steor_data(flags);
547 pa = io_addr | (data & 0x7) << 4;
549 data |= (15ULL << 12);
550 data |= (uint64_t)lmt_id;
553 roc_lmt_submit_steorl(data, pa);
555 data = cn10k_nix_tx_steor_data(flags);
556 pa = io_addr | (data & 0x7) << 4;
558 data |= ((uint64_t)(burst - 17)) << 12;
559 data |= (uint64_t)(lmt_id + 16);
562 roc_lmt_submit_steorl(data, pa);
564 data = cn10k_nix_tx_steor_data(flags);
565 pa = io_addr | (data & 0x7) << 4;
567 data |= ((uint64_t)(burst - 1)) << 12;
571 roc_lmt_submit_steorl(data, pa);
577 /* Start processing another burst */
579 /* Reset lmt base addr */
580 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
581 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
588 static __rte_always_inline uint16_t
589 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
590 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
592 struct cn10k_eth_txq *txq = tx_queue;
593 uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
594 const rte_iova_t io_addr = txq->io_addr;
595 uint16_t segdw, lmt_id, burst, left, i;
596 uint64_t data0, data1;
597 uint64_t lso_tun_fmt;
601 NIX_XMIT_FC_OR_RETURN(txq, pkts);
603 cn10k_nix_tx_skeleton(txq, cmd, flags);
605 /* Reduce the cached count */
606 txq->fc_cache_pkts -= pkts;
608 if (flags & NIX_TX_OFFLOAD_TSO_F)
609 lso_tun_fmt = txq->lso_tun_fmt;
611 /* Get LMT base address and LMT ID as lcore id */
612 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
615 burst = left > 32 ? 32 : left;
618 for (i = 0; i < burst; i++) {
619 /* Perform header writes for TSO, barrier at
620 * lmt steorl will suffice.
622 if (flags & NIX_TX_OFFLOAD_TSO_F)
623 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
625 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
627 /* Store sg list directly on lmt line */
628 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
630 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
631 tx_pkts[i]->ol_flags, segdw,
633 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
634 data128 |= (((__uint128_t)(segdw - 1)) << shft);
638 data0 = (uint64_t)data128;
639 data1 = (uint64_t)(data128 >> 64);
640 /* Make data0 similar to data1 */
644 pa0 = io_addr | (data0 & 0x7) << 4;
646 /* Move lmtst1..15 sz to bits 63:19 */
648 data0 |= (15ULL << 12);
649 data0 |= (uint64_t)lmt_id;
652 roc_lmt_submit_steorl(data0, pa0);
654 pa1 = io_addr | (data1 & 0x7) << 4;
657 data1 |= ((uint64_t)(burst - 17)) << 12;
658 data1 |= (uint64_t)(lmt_id + 16);
661 roc_lmt_submit_steorl(data1, pa1);
663 pa0 = io_addr | (data0 & 0x7) << 4;
665 /* Move lmtst1..15 sz to bits 63:19 */
667 data0 |= ((burst - 1) << 12);
668 data0 |= (uint64_t)lmt_id;
671 roc_lmt_submit_steorl(data0, pa0);
677 /* Start processing another burst */
679 /* Reset lmt base addr */
680 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
681 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
688 #if defined(RTE_ARCH_ARM64)
690 #define NIX_DESCS_PER_LOOP 4
691 static __rte_always_inline uint16_t
692 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
693 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
695 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
696 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
697 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
698 cmd2[NIX_DESCS_PER_LOOP];
699 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
700 uint64x2_t senddesc01_w0, senddesc23_w0;
701 uint64x2_t senddesc01_w1, senddesc23_w1;
702 uint16_t left, scalar, burst, i, lmt_id;
703 uint64x2_t sendext01_w0, sendext23_w0;
704 uint64x2_t sendext01_w1, sendext23_w1;
705 uint64x2_t sgdesc01_w0, sgdesc23_w0;
706 uint64x2_t sgdesc01_w1, sgdesc23_w1;
707 struct cn10k_eth_txq *txq = tx_queue;
708 uintptr_t laddr = txq->lmt_base;
709 rte_iova_t io_addr = txq->io_addr;
710 uint64x2_t ltypes01, ltypes23;
711 uint64x2_t xtmp128, ytmp128;
712 uint64x2_t xmask01, xmask23;
715 NIX_XMIT_FC_OR_RETURN(txq, pkts);
717 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
718 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
720 /* Reduce the cached count */
721 txq->fc_cache_pkts -= pkts;
723 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
724 senddesc23_w0 = senddesc01_w0;
725 senddesc01_w1 = vdupq_n_u64(0);
726 senddesc23_w1 = senddesc01_w1;
727 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
728 sgdesc23_w0 = sgdesc01_w0;
730 /* Load command defaults into vector variables. */
731 if (flags & NIX_TX_NEED_EXT_HDR) {
732 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
733 sendext23_w0 = sendext01_w0;
734 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
735 sendext23_w1 = sendext01_w1;
738 /* Get LMT base address and LMT ID as lcore id */
739 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
742 /* Number of packets to prepare depends on offloads enabled. */
743 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
744 cn10k_nix_pkts_per_vec_brst(flags) :
747 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
748 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
750 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
751 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
753 senddesc23_w0 = senddesc01_w0;
754 sgdesc23_w0 = sgdesc01_w0;
756 /* Clear vlan enables. */
757 if (flags & NIX_TX_NEED_EXT_HDR) {
758 sendext01_w1 = vbicq_u64(sendext01_w1,
759 vdupq_n_u64(0x3FFFF00FFFF00));
760 sendext23_w1 = sendext01_w1;
763 /* Move mbufs to iova */
764 mbuf0 = (uint64_t *)tx_pkts[0];
765 mbuf1 = (uint64_t *)tx_pkts[1];
766 mbuf2 = (uint64_t *)tx_pkts[2];
767 mbuf3 = (uint64_t *)tx_pkts[3];
769 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
770 offsetof(struct rte_mbuf, buf_iova));
771 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
772 offsetof(struct rte_mbuf, buf_iova));
773 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
774 offsetof(struct rte_mbuf, buf_iova));
775 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
776 offsetof(struct rte_mbuf, buf_iova));
778 * Get mbuf's, olflags, iova, pktlen, dataoff
779 * dataoff_iovaX.D[0] = iova,
780 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
781 * len_olflagsX.D[0] = ol_flags,
782 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
784 dataoff_iova0 = vld1q_u64(mbuf0);
785 len_olflags0 = vld1q_u64(mbuf0 + 2);
786 dataoff_iova1 = vld1q_u64(mbuf1);
787 len_olflags1 = vld1q_u64(mbuf1 + 2);
788 dataoff_iova2 = vld1q_u64(mbuf2);
789 len_olflags2 = vld1q_u64(mbuf2 + 2);
790 dataoff_iova3 = vld1q_u64(mbuf3);
791 len_olflags3 = vld1q_u64(mbuf3 + 2);
793 /* Move mbufs to point pool */
794 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
795 offsetof(struct rte_mbuf, pool) -
796 offsetof(struct rte_mbuf, buf_iova));
797 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
798 offsetof(struct rte_mbuf, pool) -
799 offsetof(struct rte_mbuf, buf_iova));
800 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
801 offsetof(struct rte_mbuf, pool) -
802 offsetof(struct rte_mbuf, buf_iova));
803 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
804 offsetof(struct rte_mbuf, pool) -
805 offsetof(struct rte_mbuf, buf_iova));
807 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
808 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
809 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
811 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
812 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
815 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
816 : [a] "+w"(senddesc01_w1)
817 : [in] "r"(mbuf0 + 2)
820 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
821 : [a] "+w"(senddesc01_w1)
822 : [in] "r"(mbuf1 + 2)
825 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
826 : [b] "+w"(senddesc23_w1)
827 : [in] "r"(mbuf2 + 2)
830 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
831 : [b] "+w"(senddesc23_w1)
832 : [in] "r"(mbuf3 + 2)
835 /* Get pool pointer alone */
836 mbuf0 = (uint64_t *)*mbuf0;
837 mbuf1 = (uint64_t *)*mbuf1;
838 mbuf2 = (uint64_t *)*mbuf2;
839 mbuf3 = (uint64_t *)*mbuf3;
841 /* Get pool pointer alone */
842 mbuf0 = (uint64_t *)*mbuf0;
843 mbuf1 = (uint64_t *)*mbuf1;
844 mbuf2 = (uint64_t *)*mbuf2;
845 mbuf3 = (uint64_t *)*mbuf3;
848 const uint8x16_t shuf_mask2 = {
849 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
850 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
852 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
853 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
855 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
856 const uint64x2_t and_mask0 = {
861 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
862 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
863 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
864 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
867 * Pick only 16 bits of pktlen preset at bits 63:32
868 * and place them at bits 15:0.
870 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
871 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
873 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
874 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
875 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
877 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
878 * pktlen at 15:0 position.
880 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
881 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
882 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
883 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
885 /* Move mbuf to point to pool_id. */
886 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
887 offsetof(struct rte_mempool, pool_id));
888 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
889 offsetof(struct rte_mempool, pool_id));
890 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
891 offsetof(struct rte_mempool, pool_id));
892 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
893 offsetof(struct rte_mempool, pool_id));
895 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
896 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
898 * Lookup table to translate ol_flags to
899 * il3/il4 types. But we still use ol3/ol4 types in
900 * senddesc_w1 as only one header processing is enabled.
902 const uint8x16_t tbl = {
903 /* [0-15] = il4type:il3type */
904 0x04, /* none (IPv6 assumed) */
905 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
906 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
907 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
908 0x03, /* PKT_TX_IP_CKSUM */
909 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
910 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
911 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
912 0x02, /* PKT_TX_IPV4 */
913 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
914 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
915 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
916 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
917 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
920 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
923 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
928 /* Extract olflags to translate to iltypes */
929 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
930 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
933 * E(47):L3_LEN(9):L2_LEN(7+z)
934 * E(47):L3_LEN(9):L2_LEN(7+z)
936 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
937 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
939 /* Move OLFLAGS bits 55:52 to 51:48
940 * with zeros preprended on the byte and rest
943 xtmp128 = vshrq_n_u8(xtmp128, 4);
944 ytmp128 = vshrq_n_u8(ytmp128, 4);
946 * E(48):L3_LEN(8):L2_LEN(z+7)
947 * E(48):L3_LEN(8):L2_LEN(z+7)
949 const int8x16_t tshft3 = {
950 -1, 0, 8, 8, 8, 8, 8, 8,
951 -1, 0, 8, 8, 8, 8, 8, 8,
954 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
955 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
958 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
959 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
961 /* Pick only relevant fields i.e Bit 48:55 of iltype
962 * and place it in ol3/ol4type of senddesc_w1
964 const uint8x16_t shuf_mask0 = {
965 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
966 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
969 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
970 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
972 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
973 * a [E(32):E(16):OL3(8):OL2(8)]
975 * a [E(32):E(16):(OL3+OL2):OL2]
976 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
978 senddesc01_w1 = vaddq_u8(senddesc01_w1,
979 vshlq_n_u16(senddesc01_w1, 8));
980 senddesc23_w1 = vaddq_u8(senddesc23_w1,
981 vshlq_n_u16(senddesc23_w1, 8));
983 /* Move ltypes to senddesc*_w1 */
984 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
985 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
986 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
987 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
989 * Lookup table to translate ol_flags to
993 const uint8x16_t tbl = {
994 /* [0-15] = ol4type:ol3type */
996 0x03, /* OUTER_IP_CKSUM */
997 0x02, /* OUTER_IPV4 */
998 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
999 0x04, /* OUTER_IPV6 */
1000 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1001 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1002 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1005 0x00, /* OUTER_UDP_CKSUM */
1006 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1007 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1008 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1011 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1012 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1015 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1018 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1019 * OUTER_IPV4 | OUTER_IP_CKSUM
1023 /* Extract olflags to translate to iltypes */
1024 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1025 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1028 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1029 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1031 const uint8x16_t shuf_mask5 = {
1032 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1033 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1035 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1036 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1038 /* Extract outer ol flags only */
1039 const uint64x2_t o_cksum_mask = {
1044 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1045 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1047 /* Extract OUTER_UDP_CKSUM bit 41 and
1051 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1052 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1054 /* Shift oltype by 2 to start nibble from BIT(56)
1055 * instead of BIT(58)
1057 xtmp128 = vshrq_n_u8(xtmp128, 2);
1058 ytmp128 = vshrq_n_u8(ytmp128, 2);
1060 * E(48):L3_LEN(8):L2_LEN(z+7)
1061 * E(48):L3_LEN(8):L2_LEN(z+7)
1063 const int8x16_t tshft3 = {
1064 -1, 0, 8, 8, 8, 8, 8, 8,
1065 -1, 0, 8, 8, 8, 8, 8, 8,
1068 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1069 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1072 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1073 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1075 /* Pick only relevant fields i.e Bit 56:63 of oltype
1076 * and place it in ol3/ol4type of senddesc_w1
1078 const uint8x16_t shuf_mask0 = {
1079 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1080 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1083 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1084 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1086 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1087 * a [E(32):E(16):OL3(8):OL2(8)]
1089 * a [E(32):E(16):(OL3+OL2):OL2]
1090 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1092 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1093 vshlq_n_u16(senddesc01_w1, 8));
1094 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1095 vshlq_n_u16(senddesc23_w1, 8));
1097 /* Move ltypes to senddesc*_w1 */
1098 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1099 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1100 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1101 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1102 /* Lookup table to translate ol_flags to
1103 * ol4type, ol3type, il4type, il3type of senddesc_w1
1105 const uint8x16x2_t tbl = {{
1107 /* [0-15] = il4type:il3type */
1108 0x04, /* none (IPv6) */
1109 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1110 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1111 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1112 0x03, /* PKT_TX_IP_CKSUM */
1113 0x13, /* PKT_TX_IP_CKSUM |
1116 0x23, /* PKT_TX_IP_CKSUM |
1119 0x33, /* PKT_TX_IP_CKSUM |
1122 0x02, /* PKT_TX_IPV4 */
1123 0x12, /* PKT_TX_IPV4 |
1126 0x22, /* PKT_TX_IPV4 |
1129 0x32, /* PKT_TX_IPV4 |
1132 0x03, /* PKT_TX_IPV4 |
1135 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1138 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1141 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1147 /* [16-31] = ol4type:ol3type */
1149 0x03, /* OUTER_IP_CKSUM */
1150 0x02, /* OUTER_IPV4 */
1151 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1152 0x04, /* OUTER_IPV6 */
1153 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1154 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1155 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1158 0x00, /* OUTER_UDP_CKSUM */
1159 0x33, /* OUTER_UDP_CKSUM |
1162 0x32, /* OUTER_UDP_CKSUM |
1165 0x33, /* OUTER_UDP_CKSUM |
1166 * OUTER_IPV4 | OUTER_IP_CKSUM
1168 0x34, /* OUTER_UDP_CKSUM |
1171 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1174 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1177 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1178 * OUTER_IPV4 | OUTER_IP_CKSUM
1183 /* Extract olflags to translate to oltype & iltype */
1184 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1185 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1188 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1189 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1191 const uint32x4_t tshft_4 = {
1197 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1198 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1201 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1202 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1204 const uint8x16_t shuf_mask5 = {
1205 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1206 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1208 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1209 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1211 /* Extract outer and inner header ol_flags */
1212 const uint64x2_t oi_cksum_mask = {
1217 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1218 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1220 /* Extract OUTER_UDP_CKSUM bit 41 and
1224 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1225 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1227 /* Shift right oltype by 2 and iltype by 4
1228 * to start oltype nibble from BIT(58)
1229 * instead of BIT(56) and iltype nibble from BIT(48)
1230 * instead of BIT(52).
1232 const int8x16_t tshft5 = {
1233 8, 8, 8, 8, 8, 8, -4, -2,
1234 8, 8, 8, 8, 8, 8, -4, -2,
1237 xtmp128 = vshlq_u8(xtmp128, tshft5);
1238 ytmp128 = vshlq_u8(ytmp128, tshft5);
1240 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1241 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1243 const int8x16_t tshft3 = {
1244 -1, 0, -1, 0, 0, 0, 0, 0,
1245 -1, 0, -1, 0, 0, 0, 0, 0,
1248 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1249 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1251 /* Mark Bit(4) of oltype */
1252 const uint64x2_t oi_cksum_mask2 = {
1257 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1258 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1261 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1262 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1264 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1265 * Bit 56:63 of oltype and place it in corresponding
1266 * place in senddesc_w1.
1268 const uint8x16_t shuf_mask0 = {
1269 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1270 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1273 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1274 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1276 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1277 * l3len, l2len, ol3len, ol2len.
1278 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1280 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1282 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1283 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1285 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1286 vshlq_n_u32(senddesc01_w1, 8));
1287 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1288 vshlq_n_u32(senddesc23_w1, 8));
1290 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1291 senddesc01_w1 = vaddq_u8(
1292 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1293 senddesc23_w1 = vaddq_u8(
1294 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1296 /* Move ltypes to senddesc*_w1 */
1297 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1298 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1301 xmask01 = vdupq_n_u64(0);
1303 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1308 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1313 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1318 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1322 xmask01 = vshlq_n_u64(xmask01, 20);
1323 xmask23 = vshlq_n_u64(xmask23, 20);
1325 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1326 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1328 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1329 /* Tx ol_flag for vlan. */
1330 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1331 /* Bit enable for VLAN1 */
1332 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1333 /* Tx ol_flag for QnQ. */
1334 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1335 /* Bit enable for VLAN0 */
1336 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1337 /* Load vlan values from packet. outer is VLAN 0 */
1338 uint64x2_t ext01 = {
1339 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1340 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1341 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1342 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1344 uint64x2_t ext23 = {
1345 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1346 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1347 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1348 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1351 /* Get ol_flags of the packets. */
1352 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1353 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1355 /* ORR vlan outer/inner values into cmd. */
1356 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1357 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1359 /* Test for offload enable bits and generate masks. */
1360 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1362 vandq_u64(vtstq_u64(xtmp128, olq),
1364 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1366 vandq_u64(vtstq_u64(ytmp128, olq),
1369 /* Set vlan enable bits into cmd based on mask. */
1370 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1371 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1374 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1375 /* Set don't free bit if reference count > 1 */
1376 xmask01 = vdupq_n_u64(0);
1379 /* Move mbufs to iova */
1380 mbuf0 = (uint64_t *)tx_pkts[0];
1381 mbuf1 = (uint64_t *)tx_pkts[1];
1382 mbuf2 = (uint64_t *)tx_pkts[2];
1383 mbuf3 = (uint64_t *)tx_pkts[3];
1385 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1386 vsetq_lane_u64(0x80000, xmask01, 0);
1388 __mempool_check_cookies(
1389 ((struct rte_mbuf *)mbuf0)->pool,
1390 (void **)&mbuf0, 1, 0);
1392 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1393 vsetq_lane_u64(0x80000, xmask01, 1);
1395 __mempool_check_cookies(
1396 ((struct rte_mbuf *)mbuf1)->pool,
1397 (void **)&mbuf1, 1, 0);
1399 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1400 vsetq_lane_u64(0x80000, xmask23, 0);
1402 __mempool_check_cookies(
1403 ((struct rte_mbuf *)mbuf2)->pool,
1404 (void **)&mbuf2, 1, 0);
1406 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1407 vsetq_lane_u64(0x80000, xmask23, 1);
1409 __mempool_check_cookies(
1410 ((struct rte_mbuf *)mbuf3)->pool,
1411 (void **)&mbuf3, 1, 0);
1412 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1413 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1415 /* Move mbufs to iova */
1416 mbuf0 = (uint64_t *)tx_pkts[0];
1417 mbuf1 = (uint64_t *)tx_pkts[1];
1418 mbuf2 = (uint64_t *)tx_pkts[2];
1419 mbuf3 = (uint64_t *)tx_pkts[3];
1421 /* Mark mempool object as "put" since
1422 * it is freed by NIX
1424 __mempool_check_cookies(
1425 ((struct rte_mbuf *)mbuf0)->pool,
1426 (void **)&mbuf0, 1, 0);
1428 __mempool_check_cookies(
1429 ((struct rte_mbuf *)mbuf1)->pool,
1430 (void **)&mbuf1, 1, 0);
1432 __mempool_check_cookies(
1433 ((struct rte_mbuf *)mbuf2)->pool,
1434 (void **)&mbuf2, 1, 0);
1436 __mempool_check_cookies(
1437 ((struct rte_mbuf *)mbuf3)->pool,
1438 (void **)&mbuf3, 1, 0);
1441 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1442 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1443 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1444 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1445 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1447 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1448 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1449 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1450 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1452 if (flags & NIX_TX_NEED_EXT_HDR) {
1453 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1454 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1455 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1456 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1459 if (flags & NIX_TX_NEED_EXT_HDR) {
1460 /* Store the prepared send desc to LMT lines */
1461 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1462 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1463 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1464 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
1465 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
1466 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
1468 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1469 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1470 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1471 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
1472 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
1473 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
1476 /* Store the prepared send desc to LMT lines */
1477 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1478 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
1479 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
1480 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
1481 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
1482 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
1483 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
1484 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
1488 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1493 data = cn10k_nix_tx_steor_vec_data(flags);
1494 pa = io_addr | (data & 0x7) << 4;
1496 data |= (15ULL << 12);
1497 data |= (uint64_t)lmt_id;
1500 roc_lmt_submit_steorl(data, pa);
1502 data = cn10k_nix_tx_steor_vec_data(flags);
1503 pa = io_addr | (data & 0x7) << 4;
1505 data |= ((uint64_t)(lnum - 17)) << 12;
1506 data |= (uint64_t)(lmt_id + 16);
1509 roc_lmt_submit_steorl(data, pa);
1511 data = cn10k_nix_tx_steor_vec_data(flags);
1512 pa = io_addr | (data & 0x7) << 4;
1514 data |= ((uint64_t)(lnum - 1)) << 12;
1518 roc_lmt_submit_steorl(data, pa);
1526 if (unlikely(scalar))
1527 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
1534 static __rte_always_inline uint16_t
1535 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1536 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1538 RTE_SET_USED(tx_queue);
1539 RTE_SET_USED(tx_pkts);
1542 RTE_SET_USED(flags);
1547 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1548 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1549 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1550 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1551 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1552 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1554 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1555 #define NIX_TX_FASTPATH_MODES \
1556 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
1557 NIX_TX_OFFLOAD_NONE) \
1558 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
1560 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
1562 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
1563 OL3OL4CSUM_F | L3L4CSUM_F) \
1564 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
1566 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
1567 VLAN_F | L3L4CSUM_F) \
1568 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
1569 VLAN_F | OL3OL4CSUM_F) \
1570 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
1571 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1572 T(noff, 0, 0, 1, 0, 0, 0, 4, \
1574 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
1575 NOFF_F | L3L4CSUM_F) \
1576 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
1577 NOFF_F | OL3OL4CSUM_F) \
1578 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
1579 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1580 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
1582 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
1583 NOFF_F | VLAN_F | L3L4CSUM_F) \
1584 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
1585 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1586 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
1587 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1588 T(tso, 0, 1, 0, 0, 0, 0, 6, \
1590 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
1591 TSO_F | L3L4CSUM_F) \
1592 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
1593 TSO_F | OL3OL4CSUM_F) \
1594 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
1595 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1596 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
1598 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
1599 TSO_F | VLAN_F | L3L4CSUM_F) \
1600 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
1601 TSO_F | VLAN_F | OL3OL4CSUM_F) \
1602 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
1603 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1604 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
1606 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
1607 TSO_F | NOFF_F | L3L4CSUM_F) \
1608 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
1609 TSO_F | NOFF_F | OL3OL4CSUM_F) \
1610 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
1611 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1612 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
1613 TSO_F | NOFF_F | VLAN_F) \
1614 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
1615 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1616 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
1617 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1618 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
1619 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1620 T(ts, 1, 0, 0, 0, 0, 0, 8, \
1622 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
1623 TSP_F | L3L4CSUM_F) \
1624 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
1625 TSP_F | OL3OL4CSUM_F) \
1626 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
1627 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1628 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
1630 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
1631 TSP_F | VLAN_F | L3L4CSUM_F) \
1632 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
1633 TSP_F | VLAN_F | OL3OL4CSUM_F) \
1634 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
1635 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1636 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
1638 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
1639 TSP_F | NOFF_F | L3L4CSUM_F) \
1640 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
1641 TSP_F | NOFF_F | OL3OL4CSUM_F) \
1642 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
1643 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1644 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
1645 TSP_F | NOFF_F | VLAN_F) \
1646 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
1647 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1648 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
1649 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1650 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
1651 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1652 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
1654 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
1655 TSP_F | TSO_F | L3L4CSUM_F) \
1656 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
1657 TSP_F | TSO_F | OL3OL4CSUM_F) \
1658 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
1659 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1660 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
1661 TSP_F | TSO_F | VLAN_F) \
1662 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
1663 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1664 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
1665 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1666 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
1667 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1668 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
1669 TSP_F | TSO_F | NOFF_F) \
1670 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
1671 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1672 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
1673 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1674 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
1675 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1676 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
1677 TSP_F | TSO_F | NOFF_F | VLAN_F) \
1678 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
1679 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1680 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
1681 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1682 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
1683 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1685 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
1686 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
1687 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1689 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
1690 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1692 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
1693 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1695 NIX_TX_FASTPATH_MODES
1698 #endif /* __CN10K_TX_H__ */