1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
17 /* Flags to control xmit_prepare function.
18 * Defining it from backwards to denote its been
19 * not used as offload flags to pick function
21 #define NIX_TX_MULTI_SEG_F BIT(15)
23 #define NIX_TX_NEED_SEND_HDR_W1 \
24 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
25 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
27 #define NIX_TX_NEED_EXT_HDR \
28 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
31 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
33 /* Cached value is low, Update the fc_cache_pkts */ \
34 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
35 /* Multiply with sqe_per_sqb to express in pkts */ \
36 (txq)->fc_cache_pkts = \
37 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
38 << (txq)->sqes_per_sqb_log2; \
39 /* Check it again for the room */ \
40 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
45 #define LMT_OFF(lmt_addr, lmt_num, offset) \
46 (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
48 /* Function to determine no of tx subdesc required in case ext
49 * sub desc is enabled.
51 static __rte_always_inline int
52 cn10k_nix_tx_ext_subs(const uint16_t flags)
54 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
57 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
62 static __rte_always_inline uint8_t
63 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
65 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
66 << ROC_LMT_LINES_PER_CORE_LOG2;
69 static __rte_always_inline uint8_t
70 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
72 return (flags & NIX_TX_NEED_EXT_HDR) ?
73 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
77 static __rte_always_inline uint64_t
78 cn10k_nix_tx_steor_data(const uint16_t flags)
80 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
83 /* This will be moved to addr area */
85 /* 15 vector sizes for single seg */
105 static __rte_always_inline uint64_t
106 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
108 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
111 /* This will be moved to addr area */
113 /* 15 vector sizes for single seg */
133 static __rte_always_inline void
134 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
135 const uint16_t flags)
138 cmd[0] = txq->send_hdr_w0;
142 /* Send ext if present */
143 if (flags & NIX_TX_NEED_EXT_HDR) {
144 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
153 static __rte_always_inline void
154 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
156 uint64_t mask, ol_flags = m->ol_flags;
158 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
159 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
160 uint16_t *iplen, *oiplen, *oudplen;
161 uint16_t lso_sb, paylen;
163 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
164 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
165 m->l2_len + m->l3_len + m->l4_len;
167 /* Reduce payload len from base headers */
168 paylen = m->pkt_len - lso_sb;
170 /* Get iplen position assuming no tunnel hdr */
171 iplen = (uint16_t *)(mdata + m->l2_len +
172 (2 << !!(ol_flags & PKT_TX_IPV6)));
173 /* Handle tunnel tso */
174 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
175 (ol_flags & PKT_TX_TUNNEL_MASK)) {
176 const uint8_t is_udp_tun =
177 (CNXK_NIX_UDP_TUN_BITMASK >>
178 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
181 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
183 PKT_TX_OUTER_IPV6)));
184 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
187 /* Update format for UDP tunneled packet */
189 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
190 m->outer_l3_len + 4);
191 *oudplen = rte_cpu_to_be_16(
192 rte_be_to_cpu_16(*oudplen) - paylen);
195 /* Update iplen position to inner ip hdr */
196 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
198 (2 << !!(ol_flags & PKT_TX_IPV6)));
201 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
205 static __rte_always_inline void
206 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
207 const uint16_t flags, const uint64_t lso_tun_fmt)
209 struct nix_send_ext_s *send_hdr_ext;
210 struct nix_send_hdr_s *send_hdr;
211 uint64_t ol_flags = 0, mask;
212 union nix_send_hdr_w1_u w1;
213 union nix_send_sg_s *sg;
215 send_hdr = (struct nix_send_hdr_s *)cmd;
216 if (flags & NIX_TX_NEED_EXT_HDR) {
217 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
218 sg = (union nix_send_sg_s *)(cmd + 4);
219 /* Clear previous markings */
220 send_hdr_ext->w0.lso = 0;
221 send_hdr_ext->w1.u = 0;
223 sg = (union nix_send_sg_s *)(cmd + 2);
226 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
227 ol_flags = m->ol_flags;
231 if (!(flags & NIX_TX_MULTI_SEG_F)) {
232 send_hdr->w0.total = m->data_len;
234 roc_npa_aura_handle_to_aura(m->pool->pool_id);
239 * 3 => IPV4 with csum
241 * L3type and L3ptr needs to be set for either
242 * L3 csum or L4 csum or LSO
246 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
247 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
248 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
249 const uint8_t ol3type =
250 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
251 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
252 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
255 w1.ol3type = ol3type;
256 mask = 0xffffull << ((!!ol3type) << 4);
257 w1.ol3ptr = ~mask & m->outer_l2_len;
258 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
261 w1.ol4type = csum + (csum << 1);
264 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
265 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
266 w1.il3ptr = w1.ol4ptr + m->l2_len;
267 w1.il4ptr = w1.il3ptr + m->l3_len;
268 /* Increment it by 1 if it is IPV4 as 3 is with csum */
269 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
272 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
274 /* In case of no tunnel header use only
275 * shift IL3/IL4 fields a bit to use
276 * OL3/OL4 for header checksum
279 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
280 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
282 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
283 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
284 const uint8_t outer_l2_len = m->outer_l2_len;
287 w1.ol3ptr = outer_l2_len;
288 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
289 /* Increment it by 1 if it is IPV4 as 3 is with csum */
290 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
291 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
292 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
295 w1.ol4type = csum + (csum << 1);
297 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
298 const uint8_t l2_len = m->l2_len;
300 /* Always use OLXPTR and OLXTYPE when only
301 * when one header is present
306 w1.ol4ptr = l2_len + m->l3_len;
307 /* Increment it by 1 if it is IPV4 as 3 is with csum */
308 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
309 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
310 !!(ol_flags & PKT_TX_IP_CKSUM);
313 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
316 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
317 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
318 /* HW will update ptr after vlan0 update */
319 send_hdr_ext->w1.vlan1_ins_ptr = 12;
320 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
322 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
323 /* 2B before end of l2 header */
324 send_hdr_ext->w1.vlan0_ins_ptr = 12;
325 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
328 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
332 mask = -(!w1.il3type);
333 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
335 send_hdr_ext->w0.lso_sb = lso_sb;
336 send_hdr_ext->w0.lso = 1;
337 send_hdr_ext->w0.lso_mps = m->tso_segsz;
338 send_hdr_ext->w0.lso_format =
339 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
340 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
342 /* Handle tunnel tso */
343 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
344 (ol_flags & PKT_TX_TUNNEL_MASK)) {
345 const uint8_t is_udp_tun =
346 (CNXK_NIX_UDP_TUN_BITMASK >>
347 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
349 uint8_t shift = is_udp_tun ? 32 : 0;
351 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
352 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
354 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
355 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
356 /* Update format for UDP tunneled packet */
357 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
361 if (flags & NIX_TX_NEED_SEND_HDR_W1)
362 send_hdr->w1.u = w1.u;
364 if (!(flags & NIX_TX_MULTI_SEG_F)) {
365 sg->seg1_size = m->data_len;
366 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
368 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
369 /* DF bit = 1 if refcount of current mbuf or parent mbuf
371 * DF bit = 0 otherwise
373 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
375 /* Mark mempool object as "put" since it is freed by NIX */
376 if (!send_hdr->w0.df)
377 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
380 /* With minimal offloads, 'cmd' being local could be optimized out to
381 * registers. In other cases, 'cmd' will be in stack. Intent is
382 * 'cmd' stores content from txq->cmd which is copied only once.
384 *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
386 if (flags & NIX_TX_NEED_EXT_HDR) {
387 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
390 /* In case of multi-seg, sg template is stored here */
391 *((union nix_send_sg_s *)lmt_addr) = *sg;
392 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
395 static __rte_always_inline void
396 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
397 const uint64_t ol_flags, const uint16_t no_segdw,
398 const uint16_t flags)
400 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
401 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
402 struct nix_send_ext_s *send_hdr_ext =
403 (struct nix_send_ext_s *)lmt_addr + 16;
404 uint64_t *lmt = (uint64_t *)lmt_addr;
405 uint16_t off = (no_segdw - 1) << 1;
406 struct nix_send_mem_s *send_mem;
408 send_mem = (struct nix_send_mem_s *)(lmt + off);
409 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
410 send_hdr_ext->w0.tstmp = 1;
411 if (flags & NIX_TX_MULTI_SEG_F) {
412 /* Retrieving the default desc values */
415 /* Using compiler barier to avoid voilation of C
418 rte_compiler_barrier();
421 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
422 * should not be recorded, hence changing the alg type to
423 * NIX_SENDMEMALG_SET and also changing send mem addr field to
424 * next 8 bytes as it corrpt the actual tx tstamp registered
427 send_mem->w0.subdc = NIX_SUBDC_MEM;
428 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
430 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
434 static __rte_always_inline uint16_t
435 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
437 struct nix_send_hdr_s *send_hdr;
438 union nix_send_sg_s *sg;
439 struct rte_mbuf *m_next;
440 uint64_t *slist, sg_u;
445 send_hdr = (struct nix_send_hdr_s *)cmd;
446 send_hdr->w0.total = m->pkt_len;
447 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
449 if (flags & NIX_TX_NEED_EXT_HDR)
454 sg = (union nix_send_sg_s *)&cmd[2 + off];
455 /* Clear sg->u header before use */
456 sg->u &= 0xFC00000000000000;
458 slist = &cmd[3 + off];
461 nb_segs = m->nb_segs;
463 /* Fill mbuf segments */
466 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
467 *slist = rte_mbuf_data_iova(m);
468 /* Set invert df if buffer is not to be freed by H/W */
469 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
470 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
471 /* Mark mempool object as "put" since it is freed by NIX
473 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
474 if (!(sg_u & (1ULL << (i + 55))))
475 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
480 if (i > 2 && nb_segs) {
482 /* Next SG subdesc */
483 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
486 sg = (union nix_send_sg_s *)slist;
495 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
496 /* Roundup extra dwords to multiple of 2 */
497 segdw = (segdw >> 1) + (segdw & 0x1);
499 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
500 send_hdr->w0.sizem1 = segdw - 1;
505 static __rte_always_inline uint16_t
506 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
507 uint64_t *cmd, const uint16_t flags)
509 struct cn10k_eth_txq *txq = tx_queue;
510 const rte_iova_t io_addr = txq->io_addr;
511 uintptr_t pa, lmt_addr = txq->lmt_base;
512 uint16_t lmt_id, burst, left, i;
513 uint64_t lso_tun_fmt;
516 NIX_XMIT_FC_OR_RETURN(txq, pkts);
518 /* Get cmd skeleton */
519 cn10k_nix_tx_skeleton(txq, cmd, flags);
521 /* Reduce the cached count */
522 txq->fc_cache_pkts -= pkts;
524 if (flags & NIX_TX_OFFLOAD_TSO_F)
525 lso_tun_fmt = txq->lso_tun_fmt;
527 /* Get LMT base address and LMT ID as lcore id */
528 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
531 burst = left > 32 ? 32 : left;
532 for (i = 0; i < burst; i++) {
533 /* Perform header writes for TSO, barrier at
534 * lmt steorl will suffice.
536 if (flags & NIX_TX_OFFLOAD_TSO_F)
537 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
539 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
541 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
542 tx_pkts[i]->ol_flags, 4, flags);
543 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
548 data = cn10k_nix_tx_steor_data(flags);
549 pa = io_addr | (data & 0x7) << 4;
551 data |= (15ULL << 12);
552 data |= (uint64_t)lmt_id;
555 roc_lmt_submit_steorl(data, pa);
557 data = cn10k_nix_tx_steor_data(flags);
558 pa = io_addr | (data & 0x7) << 4;
560 data |= ((uint64_t)(burst - 17)) << 12;
561 data |= (uint64_t)(lmt_id + 16);
564 roc_lmt_submit_steorl(data, pa);
566 data = cn10k_nix_tx_steor_data(flags);
567 pa = io_addr | (data & 0x7) << 4;
569 data |= ((uint64_t)(burst - 1)) << 12;
573 roc_lmt_submit_steorl(data, pa);
579 /* Start processing another burst */
581 /* Reset lmt base addr */
582 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
583 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
590 static __rte_always_inline uint16_t
591 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
592 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
594 struct cn10k_eth_txq *txq = tx_queue;
595 uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
596 const rte_iova_t io_addr = txq->io_addr;
597 uint16_t segdw, lmt_id, burst, left, i;
598 uint64_t data0, data1;
599 uint64_t lso_tun_fmt;
603 NIX_XMIT_FC_OR_RETURN(txq, pkts);
605 cn10k_nix_tx_skeleton(txq, cmd, flags);
607 /* Reduce the cached count */
608 txq->fc_cache_pkts -= pkts;
610 if (flags & NIX_TX_OFFLOAD_TSO_F)
611 lso_tun_fmt = txq->lso_tun_fmt;
613 /* Get LMT base address and LMT ID as lcore id */
614 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
617 burst = left > 32 ? 32 : left;
620 for (i = 0; i < burst; i++) {
621 /* Perform header writes for TSO, barrier at
622 * lmt steorl will suffice.
624 if (flags & NIX_TX_OFFLOAD_TSO_F)
625 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
627 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
629 /* Store sg list directly on lmt line */
630 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
632 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
633 tx_pkts[i]->ol_flags, segdw,
635 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
636 data128 |= (((__uint128_t)(segdw - 1)) << shft);
640 data0 = (uint64_t)data128;
641 data1 = (uint64_t)(data128 >> 64);
642 /* Make data0 similar to data1 */
646 pa0 = io_addr | (data0 & 0x7) << 4;
648 /* Move lmtst1..15 sz to bits 63:19 */
650 data0 |= (15ULL << 12);
651 data0 |= (uint64_t)lmt_id;
654 roc_lmt_submit_steorl(data0, pa0);
656 pa1 = io_addr | (data1 & 0x7) << 4;
659 data1 |= ((uint64_t)(burst - 17)) << 12;
660 data1 |= (uint64_t)(lmt_id + 16);
663 roc_lmt_submit_steorl(data1, pa1);
665 pa0 = io_addr | (data0 & 0x7) << 4;
667 /* Move lmtst1..15 sz to bits 63:19 */
669 data0 |= ((burst - 1) << 12);
670 data0 |= (uint64_t)lmt_id;
673 roc_lmt_submit_steorl(data0, pa0);
679 /* Start processing another burst */
681 /* Reset lmt base addr */
682 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
683 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
690 #if defined(RTE_ARCH_ARM64)
692 #define NIX_DESCS_PER_LOOP 4
693 static __rte_always_inline uint16_t
694 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
695 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
697 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
698 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
699 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
700 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
701 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
702 uint64x2_t senddesc01_w0, senddesc23_w0;
703 uint64x2_t senddesc01_w1, senddesc23_w1;
704 uint16_t left, scalar, burst, i, lmt_id;
705 uint64x2_t sendext01_w0, sendext23_w0;
706 uint64x2_t sendext01_w1, sendext23_w1;
707 uint64x2_t sendmem01_w0, sendmem23_w0;
708 uint64x2_t sendmem01_w1, sendmem23_w1;
709 uint64x2_t sgdesc01_w0, sgdesc23_w0;
710 uint64x2_t sgdesc01_w1, sgdesc23_w1;
711 struct cn10k_eth_txq *txq = tx_queue;
712 uintptr_t laddr = txq->lmt_base;
713 rte_iova_t io_addr = txq->io_addr;
714 uint64x2_t ltypes01, ltypes23;
715 uint64x2_t xtmp128, ytmp128;
716 uint64x2_t xmask01, xmask23;
719 NIX_XMIT_FC_OR_RETURN(txq, pkts);
721 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
722 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
724 /* Reduce the cached count */
725 txq->fc_cache_pkts -= pkts;
727 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
728 senddesc23_w0 = senddesc01_w0;
729 senddesc01_w1 = vdupq_n_u64(0);
730 senddesc23_w1 = senddesc01_w1;
731 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
732 sgdesc23_w0 = sgdesc01_w0;
734 /* Load command defaults into vector variables. */
735 if (flags & NIX_TX_NEED_EXT_HDR) {
736 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
737 sendext23_w0 = sendext01_w0;
738 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
739 sendext23_w1 = sendext01_w1;
740 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
741 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
742 sendmem23_w0 = sendmem01_w0;
743 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
744 sendmem23_w1 = sendmem01_w1;
748 /* Get LMT base address and LMT ID as lcore id */
749 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
752 /* Number of packets to prepare depends on offloads enabled. */
753 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
754 cn10k_nix_pkts_per_vec_brst(flags) :
757 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
758 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
760 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
761 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
763 senddesc23_w0 = senddesc01_w0;
764 sgdesc23_w0 = sgdesc01_w0;
766 /* Clear vlan enables. */
767 if (flags & NIX_TX_NEED_EXT_HDR) {
768 sendext01_w1 = vbicq_u64(sendext01_w1,
769 vdupq_n_u64(0x3FFFF00FFFF00));
770 sendext23_w1 = sendext01_w1;
773 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
774 /* Reset send mem alg to SETTSTMP from SUB*/
775 sendmem01_w0 = vbicq_u64(sendmem01_w0,
776 vdupq_n_u64(BIT_ULL(59)));
777 /* Reset send mem address to default. */
779 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
780 sendmem23_w0 = sendmem01_w0;
781 sendmem23_w1 = sendmem01_w1;
784 /* Move mbufs to iova */
785 mbuf0 = (uint64_t *)tx_pkts[0];
786 mbuf1 = (uint64_t *)tx_pkts[1];
787 mbuf2 = (uint64_t *)tx_pkts[2];
788 mbuf3 = (uint64_t *)tx_pkts[3];
790 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
791 offsetof(struct rte_mbuf, buf_iova));
792 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
793 offsetof(struct rte_mbuf, buf_iova));
794 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
795 offsetof(struct rte_mbuf, buf_iova));
796 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
797 offsetof(struct rte_mbuf, buf_iova));
799 * Get mbuf's, olflags, iova, pktlen, dataoff
800 * dataoff_iovaX.D[0] = iova,
801 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
802 * len_olflagsX.D[0] = ol_flags,
803 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
805 dataoff_iova0 = vld1q_u64(mbuf0);
806 len_olflags0 = vld1q_u64(mbuf0 + 2);
807 dataoff_iova1 = vld1q_u64(mbuf1);
808 len_olflags1 = vld1q_u64(mbuf1 + 2);
809 dataoff_iova2 = vld1q_u64(mbuf2);
810 len_olflags2 = vld1q_u64(mbuf2 + 2);
811 dataoff_iova3 = vld1q_u64(mbuf3);
812 len_olflags3 = vld1q_u64(mbuf3 + 2);
814 /* Move mbufs to point pool */
815 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
816 offsetof(struct rte_mbuf, pool) -
817 offsetof(struct rte_mbuf, buf_iova));
818 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
819 offsetof(struct rte_mbuf, pool) -
820 offsetof(struct rte_mbuf, buf_iova));
821 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
822 offsetof(struct rte_mbuf, pool) -
823 offsetof(struct rte_mbuf, buf_iova));
824 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
825 offsetof(struct rte_mbuf, pool) -
826 offsetof(struct rte_mbuf, buf_iova));
828 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
829 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
830 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
832 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
833 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
836 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
837 : [a] "+w"(senddesc01_w1)
838 : [in] "r"(mbuf0 + 2)
841 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
842 : [a] "+w"(senddesc01_w1)
843 : [in] "r"(mbuf1 + 2)
846 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
847 : [b] "+w"(senddesc23_w1)
848 : [in] "r"(mbuf2 + 2)
851 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
852 : [b] "+w"(senddesc23_w1)
853 : [in] "r"(mbuf3 + 2)
856 /* Get pool pointer alone */
857 mbuf0 = (uint64_t *)*mbuf0;
858 mbuf1 = (uint64_t *)*mbuf1;
859 mbuf2 = (uint64_t *)*mbuf2;
860 mbuf3 = (uint64_t *)*mbuf3;
862 /* Get pool pointer alone */
863 mbuf0 = (uint64_t *)*mbuf0;
864 mbuf1 = (uint64_t *)*mbuf1;
865 mbuf2 = (uint64_t *)*mbuf2;
866 mbuf3 = (uint64_t *)*mbuf3;
869 const uint8x16_t shuf_mask2 = {
870 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
871 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
873 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
874 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
876 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
877 const uint64x2_t and_mask0 = {
882 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
883 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
884 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
885 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
888 * Pick only 16 bits of pktlen preset at bits 63:32
889 * and place them at bits 15:0.
891 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
892 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
894 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
895 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
896 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
898 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
899 * pktlen at 15:0 position.
901 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
902 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
903 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
904 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
906 /* Move mbuf to point to pool_id. */
907 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
908 offsetof(struct rte_mempool, pool_id));
909 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
910 offsetof(struct rte_mempool, pool_id));
911 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
912 offsetof(struct rte_mempool, pool_id));
913 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
914 offsetof(struct rte_mempool, pool_id));
916 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
917 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
919 * Lookup table to translate ol_flags to
920 * il3/il4 types. But we still use ol3/ol4 types in
921 * senddesc_w1 as only one header processing is enabled.
923 const uint8x16_t tbl = {
924 /* [0-15] = il4type:il3type */
925 0x04, /* none (IPv6 assumed) */
926 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
927 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
928 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
929 0x03, /* PKT_TX_IP_CKSUM */
930 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
931 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
932 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
933 0x02, /* PKT_TX_IPV4 */
934 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
935 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
936 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
937 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
938 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
941 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
944 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
949 /* Extract olflags to translate to iltypes */
950 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
951 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
954 * E(47):L3_LEN(9):L2_LEN(7+z)
955 * E(47):L3_LEN(9):L2_LEN(7+z)
957 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
958 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
960 /* Move OLFLAGS bits 55:52 to 51:48
961 * with zeros preprended on the byte and rest
964 xtmp128 = vshrq_n_u8(xtmp128, 4);
965 ytmp128 = vshrq_n_u8(ytmp128, 4);
967 * E(48):L3_LEN(8):L2_LEN(z+7)
968 * E(48):L3_LEN(8):L2_LEN(z+7)
970 const int8x16_t tshft3 = {
971 -1, 0, 8, 8, 8, 8, 8, 8,
972 -1, 0, 8, 8, 8, 8, 8, 8,
975 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
976 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
979 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
980 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
982 /* Pick only relevant fields i.e Bit 48:55 of iltype
983 * and place it in ol3/ol4type of senddesc_w1
985 const uint8x16_t shuf_mask0 = {
986 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
987 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
990 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
991 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
993 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
994 * a [E(32):E(16):OL3(8):OL2(8)]
996 * a [E(32):E(16):(OL3+OL2):OL2]
997 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
999 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1000 vshlq_n_u16(senddesc01_w1, 8));
1001 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1002 vshlq_n_u16(senddesc23_w1, 8));
1004 /* Move ltypes to senddesc*_w1 */
1005 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1006 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1007 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1008 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1010 * Lookup table to translate ol_flags to
1014 const uint8x16_t tbl = {
1015 /* [0-15] = ol4type:ol3type */
1017 0x03, /* OUTER_IP_CKSUM */
1018 0x02, /* OUTER_IPV4 */
1019 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1020 0x04, /* OUTER_IPV6 */
1021 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1022 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1023 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1026 0x00, /* OUTER_UDP_CKSUM */
1027 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1028 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1029 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1032 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1033 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1036 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1039 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1040 * OUTER_IPV4 | OUTER_IP_CKSUM
1044 /* Extract olflags to translate to iltypes */
1045 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1046 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1049 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1050 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1052 const uint8x16_t shuf_mask5 = {
1053 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1054 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1056 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1057 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1059 /* Extract outer ol flags only */
1060 const uint64x2_t o_cksum_mask = {
1065 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1066 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1068 /* Extract OUTER_UDP_CKSUM bit 41 and
1072 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1073 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1075 /* Shift oltype by 2 to start nibble from BIT(56)
1076 * instead of BIT(58)
1078 xtmp128 = vshrq_n_u8(xtmp128, 2);
1079 ytmp128 = vshrq_n_u8(ytmp128, 2);
1081 * E(48):L3_LEN(8):L2_LEN(z+7)
1082 * E(48):L3_LEN(8):L2_LEN(z+7)
1084 const int8x16_t tshft3 = {
1085 -1, 0, 8, 8, 8, 8, 8, 8,
1086 -1, 0, 8, 8, 8, 8, 8, 8,
1089 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1090 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1093 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1094 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1096 /* Pick only relevant fields i.e Bit 56:63 of oltype
1097 * and place it in ol3/ol4type of senddesc_w1
1099 const uint8x16_t shuf_mask0 = {
1100 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1101 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1104 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1105 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1107 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1108 * a [E(32):E(16):OL3(8):OL2(8)]
1110 * a [E(32):E(16):(OL3+OL2):OL2]
1111 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1113 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1114 vshlq_n_u16(senddesc01_w1, 8));
1115 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1116 vshlq_n_u16(senddesc23_w1, 8));
1118 /* Move ltypes to senddesc*_w1 */
1119 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1120 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1121 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1122 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1123 /* Lookup table to translate ol_flags to
1124 * ol4type, ol3type, il4type, il3type of senddesc_w1
1126 const uint8x16x2_t tbl = {{
1128 /* [0-15] = il4type:il3type */
1129 0x04, /* none (IPv6) */
1130 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1131 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1132 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1133 0x03, /* PKT_TX_IP_CKSUM */
1134 0x13, /* PKT_TX_IP_CKSUM |
1137 0x23, /* PKT_TX_IP_CKSUM |
1140 0x33, /* PKT_TX_IP_CKSUM |
1143 0x02, /* PKT_TX_IPV4 */
1144 0x12, /* PKT_TX_IPV4 |
1147 0x22, /* PKT_TX_IPV4 |
1150 0x32, /* PKT_TX_IPV4 |
1153 0x03, /* PKT_TX_IPV4 |
1156 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1159 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1162 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1168 /* [16-31] = ol4type:ol3type */
1170 0x03, /* OUTER_IP_CKSUM */
1171 0x02, /* OUTER_IPV4 */
1172 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1173 0x04, /* OUTER_IPV6 */
1174 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1175 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1176 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1179 0x00, /* OUTER_UDP_CKSUM */
1180 0x33, /* OUTER_UDP_CKSUM |
1183 0x32, /* OUTER_UDP_CKSUM |
1186 0x33, /* OUTER_UDP_CKSUM |
1187 * OUTER_IPV4 | OUTER_IP_CKSUM
1189 0x34, /* OUTER_UDP_CKSUM |
1192 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1195 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1198 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1199 * OUTER_IPV4 | OUTER_IP_CKSUM
1204 /* Extract olflags to translate to oltype & iltype */
1205 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1206 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1209 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1210 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1212 const uint32x4_t tshft_4 = {
1218 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1219 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1222 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1223 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1225 const uint8x16_t shuf_mask5 = {
1226 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1227 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1229 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1230 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1232 /* Extract outer and inner header ol_flags */
1233 const uint64x2_t oi_cksum_mask = {
1238 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1239 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1241 /* Extract OUTER_UDP_CKSUM bit 41 and
1245 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1246 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1248 /* Shift right oltype by 2 and iltype by 4
1249 * to start oltype nibble from BIT(58)
1250 * instead of BIT(56) and iltype nibble from BIT(48)
1251 * instead of BIT(52).
1253 const int8x16_t tshft5 = {
1254 8, 8, 8, 8, 8, 8, -4, -2,
1255 8, 8, 8, 8, 8, 8, -4, -2,
1258 xtmp128 = vshlq_u8(xtmp128, tshft5);
1259 ytmp128 = vshlq_u8(ytmp128, tshft5);
1261 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1262 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1264 const int8x16_t tshft3 = {
1265 -1, 0, -1, 0, 0, 0, 0, 0,
1266 -1, 0, -1, 0, 0, 0, 0, 0,
1269 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1270 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1272 /* Mark Bit(4) of oltype */
1273 const uint64x2_t oi_cksum_mask2 = {
1278 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1279 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1282 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1283 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1285 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1286 * Bit 56:63 of oltype and place it in corresponding
1287 * place in senddesc_w1.
1289 const uint8x16_t shuf_mask0 = {
1290 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1291 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1294 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1295 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1297 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1298 * l3len, l2len, ol3len, ol2len.
1299 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1301 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1303 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1304 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1306 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1307 vshlq_n_u32(senddesc01_w1, 8));
1308 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1309 vshlq_n_u32(senddesc23_w1, 8));
1311 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1312 senddesc01_w1 = vaddq_u8(
1313 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1314 senddesc23_w1 = vaddq_u8(
1315 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1317 /* Move ltypes to senddesc*_w1 */
1318 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1319 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1322 xmask01 = vdupq_n_u64(0);
1324 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1329 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1334 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1339 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1343 xmask01 = vshlq_n_u64(xmask01, 20);
1344 xmask23 = vshlq_n_u64(xmask23, 20);
1346 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1347 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1349 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1350 /* Tx ol_flag for vlan. */
1351 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1352 /* Bit enable for VLAN1 */
1353 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1354 /* Tx ol_flag for QnQ. */
1355 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1356 /* Bit enable for VLAN0 */
1357 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1358 /* Load vlan values from packet. outer is VLAN 0 */
1359 uint64x2_t ext01 = {
1360 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1361 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1362 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1363 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1365 uint64x2_t ext23 = {
1366 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1367 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1368 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1369 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1372 /* Get ol_flags of the packets. */
1373 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1374 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1376 /* ORR vlan outer/inner values into cmd. */
1377 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1378 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1380 /* Test for offload enable bits and generate masks. */
1381 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1383 vandq_u64(vtstq_u64(xtmp128, olq),
1385 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1387 vandq_u64(vtstq_u64(ytmp128, olq),
1390 /* Set vlan enable bits into cmd based on mask. */
1391 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1392 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1395 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1396 /* Tx ol_flag for timestam. */
1397 const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
1398 PKT_TX_IEEE1588_TMST};
1399 /* Set send mem alg to SUB. */
1400 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1401 /* Increment send mem address by 8. */
1402 const uint64x2_t addr = {0x8, 0x8};
1404 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1405 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1407 /* Check if timestamp is requested and generate inverted
1408 * mask as we need not make any changes to default cmd
1411 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1412 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1414 /* Change send mem address to an 8 byte offset when
1415 * TSTMP is disabled.
1417 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1418 vandq_u64(xtmp128, addr));
1419 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1420 vandq_u64(ytmp128, addr));
1421 /* Change send mem alg to SUB when TSTMP is disabled. */
1422 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1423 vandq_u64(xtmp128, alg));
1424 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1425 vandq_u64(ytmp128, alg));
1427 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1428 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1429 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1430 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1433 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1434 /* Set don't free bit if reference count > 1 */
1435 xmask01 = vdupq_n_u64(0);
1438 /* Move mbufs to iova */
1439 mbuf0 = (uint64_t *)tx_pkts[0];
1440 mbuf1 = (uint64_t *)tx_pkts[1];
1441 mbuf2 = (uint64_t *)tx_pkts[2];
1442 mbuf3 = (uint64_t *)tx_pkts[3];
1444 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1445 vsetq_lane_u64(0x80000, xmask01, 0);
1447 __mempool_check_cookies(
1448 ((struct rte_mbuf *)mbuf0)->pool,
1449 (void **)&mbuf0, 1, 0);
1451 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1452 vsetq_lane_u64(0x80000, xmask01, 1);
1454 __mempool_check_cookies(
1455 ((struct rte_mbuf *)mbuf1)->pool,
1456 (void **)&mbuf1, 1, 0);
1458 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1459 vsetq_lane_u64(0x80000, xmask23, 0);
1461 __mempool_check_cookies(
1462 ((struct rte_mbuf *)mbuf2)->pool,
1463 (void **)&mbuf2, 1, 0);
1465 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1466 vsetq_lane_u64(0x80000, xmask23, 1);
1468 __mempool_check_cookies(
1469 ((struct rte_mbuf *)mbuf3)->pool,
1470 (void **)&mbuf3, 1, 0);
1471 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1472 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1474 /* Move mbufs to iova */
1475 mbuf0 = (uint64_t *)tx_pkts[0];
1476 mbuf1 = (uint64_t *)tx_pkts[1];
1477 mbuf2 = (uint64_t *)tx_pkts[2];
1478 mbuf3 = (uint64_t *)tx_pkts[3];
1480 /* Mark mempool object as "put" since
1481 * it is freed by NIX
1483 __mempool_check_cookies(
1484 ((struct rte_mbuf *)mbuf0)->pool,
1485 (void **)&mbuf0, 1, 0);
1487 __mempool_check_cookies(
1488 ((struct rte_mbuf *)mbuf1)->pool,
1489 (void **)&mbuf1, 1, 0);
1491 __mempool_check_cookies(
1492 ((struct rte_mbuf *)mbuf2)->pool,
1493 (void **)&mbuf2, 1, 0);
1495 __mempool_check_cookies(
1496 ((struct rte_mbuf *)mbuf3)->pool,
1497 (void **)&mbuf3, 1, 0);
1500 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1501 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1502 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1503 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1504 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1506 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1507 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1508 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1509 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1511 if (flags & NIX_TX_NEED_EXT_HDR) {
1512 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1513 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1514 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1515 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1518 if (flags & NIX_TX_NEED_EXT_HDR) {
1519 /* Store the prepared send desc to LMT lines */
1520 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1521 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1522 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1523 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1524 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
1525 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
1526 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
1527 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
1528 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
1530 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1531 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1532 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1533 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
1534 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
1535 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
1536 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
1537 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
1539 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1540 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1541 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1542 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
1543 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
1544 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
1546 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1547 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1548 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1549 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
1550 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
1551 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
1555 /* Store the prepared send desc to LMT lines */
1556 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1557 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
1558 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
1559 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
1560 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
1561 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
1562 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
1563 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
1567 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1572 data = cn10k_nix_tx_steor_vec_data(flags);
1573 pa = io_addr | (data & 0x7) << 4;
1575 data |= (15ULL << 12);
1576 data |= (uint64_t)lmt_id;
1579 roc_lmt_submit_steorl(data, pa);
1581 data = cn10k_nix_tx_steor_vec_data(flags);
1582 pa = io_addr | (data & 0x7) << 4;
1584 data |= ((uint64_t)(lnum - 17)) << 12;
1585 data |= (uint64_t)(lmt_id + 16);
1588 roc_lmt_submit_steorl(data, pa);
1590 data = cn10k_nix_tx_steor_vec_data(flags);
1591 pa = io_addr | (data & 0x7) << 4;
1593 data |= ((uint64_t)(lnum - 1)) << 12;
1597 roc_lmt_submit_steorl(data, pa);
1605 if (unlikely(scalar))
1606 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
1613 static __rte_always_inline uint16_t
1614 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1615 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1617 RTE_SET_USED(tx_queue);
1618 RTE_SET_USED(tx_pkts);
1621 RTE_SET_USED(flags);
1626 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1627 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1628 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1629 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1630 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1631 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1633 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1634 #define NIX_TX_FASTPATH_MODES \
1635 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
1636 NIX_TX_OFFLOAD_NONE) \
1637 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
1639 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
1641 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
1642 OL3OL4CSUM_F | L3L4CSUM_F) \
1643 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
1645 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
1646 VLAN_F | L3L4CSUM_F) \
1647 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
1648 VLAN_F | OL3OL4CSUM_F) \
1649 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
1650 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1651 T(noff, 0, 0, 1, 0, 0, 0, 4, \
1653 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
1654 NOFF_F | L3L4CSUM_F) \
1655 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
1656 NOFF_F | OL3OL4CSUM_F) \
1657 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
1658 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1659 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
1661 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
1662 NOFF_F | VLAN_F | L3L4CSUM_F) \
1663 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
1664 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1665 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
1666 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1667 T(tso, 0, 1, 0, 0, 0, 0, 6, \
1669 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
1670 TSO_F | L3L4CSUM_F) \
1671 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
1672 TSO_F | OL3OL4CSUM_F) \
1673 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
1674 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1675 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
1677 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
1678 TSO_F | VLAN_F | L3L4CSUM_F) \
1679 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
1680 TSO_F | VLAN_F | OL3OL4CSUM_F) \
1681 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
1682 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1683 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
1685 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
1686 TSO_F | NOFF_F | L3L4CSUM_F) \
1687 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
1688 TSO_F | NOFF_F | OL3OL4CSUM_F) \
1689 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
1690 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1691 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
1692 TSO_F | NOFF_F | VLAN_F) \
1693 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
1694 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1695 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
1696 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1697 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
1698 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1699 T(ts, 1, 0, 0, 0, 0, 0, 8, \
1701 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
1702 TSP_F | L3L4CSUM_F) \
1703 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
1704 TSP_F | OL3OL4CSUM_F) \
1705 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
1706 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1707 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
1709 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
1710 TSP_F | VLAN_F | L3L4CSUM_F) \
1711 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
1712 TSP_F | VLAN_F | OL3OL4CSUM_F) \
1713 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
1714 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1715 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
1717 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
1718 TSP_F | NOFF_F | L3L4CSUM_F) \
1719 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
1720 TSP_F | NOFF_F | OL3OL4CSUM_F) \
1721 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
1722 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1723 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
1724 TSP_F | NOFF_F | VLAN_F) \
1725 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
1726 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1727 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
1728 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1729 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
1730 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1731 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
1733 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
1734 TSP_F | TSO_F | L3L4CSUM_F) \
1735 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
1736 TSP_F | TSO_F | OL3OL4CSUM_F) \
1737 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
1738 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1739 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
1740 TSP_F | TSO_F | VLAN_F) \
1741 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
1742 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1743 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
1744 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1745 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
1746 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1747 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
1748 TSP_F | TSO_F | NOFF_F) \
1749 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
1750 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1751 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
1752 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1753 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
1754 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1755 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
1756 TSP_F | TSO_F | NOFF_F | VLAN_F) \
1757 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
1758 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1759 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
1760 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1761 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
1762 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1764 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
1765 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
1766 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1768 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
1769 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1771 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
1772 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1774 NIX_TX_FASTPATH_MODES
1777 #endif /* __CN10K_TX_H__ */