1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
17 /* Flags to control xmit_prepare function.
18 * Defining it from backwards to denote its been
19 * not used as offload flags to pick function
21 #define NIX_TX_MULTI_SEG_F BIT(15)
23 #define NIX_TX_NEED_SEND_HDR_W1 \
24 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
25 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
27 #define NIX_TX_NEED_EXT_HDR \
28 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
31 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
33 /* Cached value is low, Update the fc_cache_pkts */ \
34 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
35 /* Multiply with sqe_per_sqb to express in pkts */ \
36 (txq)->fc_cache_pkts = \
37 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
38 << (txq)->sqes_per_sqb_log2; \
39 /* Check it again for the room */ \
40 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
45 #define LMT_OFF(lmt_addr, lmt_num, offset) \
46 (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
48 /* Function to determine no of tx subdesc required in case ext
49 * sub desc is enabled.
51 static __rte_always_inline int
52 cn10k_nix_tx_ext_subs(const uint16_t flags)
54 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
57 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
62 static __rte_always_inline uint8_t
63 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
65 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
66 << ROC_LMT_LINES_PER_CORE_LOG2;
69 static __rte_always_inline uint8_t
70 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
72 return (flags & NIX_TX_NEED_EXT_HDR) ?
73 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
77 static __rte_always_inline uint64_t
78 cn10k_nix_tx_steor_data(const uint16_t flags)
80 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
83 /* This will be moved to addr area */
85 /* 15 vector sizes for single seg */
105 static __rte_always_inline uint64_t
106 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
108 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
111 /* This will be moved to addr area */
113 /* 15 vector sizes for single seg */
133 static __rte_always_inline void
134 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
135 const uint16_t flags)
138 cmd[0] = txq->send_hdr_w0;
142 /* Send ext if present */
143 if (flags & NIX_TX_NEED_EXT_HDR) {
144 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
153 static __rte_always_inline void
154 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
156 uint64_t mask, ol_flags = m->ol_flags;
158 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
159 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
160 uint16_t *iplen, *oiplen, *oudplen;
161 uint16_t lso_sb, paylen;
163 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
164 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
165 m->l2_len + m->l3_len + m->l4_len;
167 /* Reduce payload len from base headers */
168 paylen = m->pkt_len - lso_sb;
170 /* Get iplen position assuming no tunnel hdr */
171 iplen = (uint16_t *)(mdata + m->l2_len +
172 (2 << !!(ol_flags & PKT_TX_IPV6)));
173 /* Handle tunnel tso */
174 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
175 (ol_flags & PKT_TX_TUNNEL_MASK)) {
176 const uint8_t is_udp_tun =
177 (CNXK_NIX_UDP_TUN_BITMASK >>
178 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
181 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
183 PKT_TX_OUTER_IPV6)));
184 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
187 /* Update format for UDP tunneled packet */
189 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
190 m->outer_l3_len + 4);
191 *oudplen = rte_cpu_to_be_16(
192 rte_be_to_cpu_16(*oudplen) - paylen);
195 /* Update iplen position to inner ip hdr */
196 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
198 (2 << !!(ol_flags & PKT_TX_IPV6)));
201 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
205 static __rte_always_inline void
206 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
207 const uint16_t flags, const uint64_t lso_tun_fmt)
209 struct nix_send_ext_s *send_hdr_ext;
210 struct nix_send_hdr_s *send_hdr;
211 uint64_t ol_flags = 0, mask;
212 union nix_send_hdr_w1_u w1;
213 union nix_send_sg_s *sg;
215 send_hdr = (struct nix_send_hdr_s *)cmd;
216 if (flags & NIX_TX_NEED_EXT_HDR) {
217 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
218 sg = (union nix_send_sg_s *)(cmd + 4);
219 /* Clear previous markings */
220 send_hdr_ext->w0.lso = 0;
221 send_hdr_ext->w1.u = 0;
223 sg = (union nix_send_sg_s *)(cmd + 2);
226 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
227 ol_flags = m->ol_flags;
231 if (!(flags & NIX_TX_MULTI_SEG_F)) {
232 send_hdr->w0.total = m->data_len;
234 roc_npa_aura_handle_to_aura(m->pool->pool_id);
239 * 3 => IPV4 with csum
241 * L3type and L3ptr needs to be set for either
242 * L3 csum or L4 csum or LSO
246 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
247 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
248 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
249 const uint8_t ol3type =
250 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
251 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
252 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
255 w1.ol3type = ol3type;
256 mask = 0xffffull << ((!!ol3type) << 4);
257 w1.ol3ptr = ~mask & m->outer_l2_len;
258 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
261 w1.ol4type = csum + (csum << 1);
264 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
265 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
266 w1.il3ptr = w1.ol4ptr + m->l2_len;
267 w1.il4ptr = w1.il3ptr + m->l3_len;
268 /* Increment it by 1 if it is IPV4 as 3 is with csum */
269 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
272 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
274 /* In case of no tunnel header use only
275 * shift IL3/IL4 fields a bit to use
276 * OL3/OL4 for header checksum
279 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
280 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
282 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
283 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
284 const uint8_t outer_l2_len = m->outer_l2_len;
287 w1.ol3ptr = outer_l2_len;
288 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
289 /* Increment it by 1 if it is IPV4 as 3 is with csum */
290 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
291 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
292 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
295 w1.ol4type = csum + (csum << 1);
297 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
298 const uint8_t l2_len = m->l2_len;
300 /* Always use OLXPTR and OLXTYPE when only
301 * when one header is present
306 w1.ol4ptr = l2_len + m->l3_len;
307 /* Increment it by 1 if it is IPV4 as 3 is with csum */
308 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
309 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
310 !!(ol_flags & PKT_TX_IP_CKSUM);
313 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
316 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
317 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
318 /* HW will update ptr after vlan0 update */
319 send_hdr_ext->w1.vlan1_ins_ptr = 12;
320 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
322 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
323 /* 2B before end of l2 header */
324 send_hdr_ext->w1.vlan0_ins_ptr = 12;
325 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
328 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
332 mask = -(!w1.il3type);
333 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
335 send_hdr_ext->w0.lso_sb = lso_sb;
336 send_hdr_ext->w0.lso = 1;
337 send_hdr_ext->w0.lso_mps = m->tso_segsz;
338 send_hdr_ext->w0.lso_format =
339 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
340 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
342 /* Handle tunnel tso */
343 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
344 (ol_flags & PKT_TX_TUNNEL_MASK)) {
345 const uint8_t is_udp_tun =
346 (CNXK_NIX_UDP_TUN_BITMASK >>
347 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
349 uint8_t shift = is_udp_tun ? 32 : 0;
351 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
352 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
354 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
355 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
356 /* Update format for UDP tunneled packet */
357 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
361 if (flags & NIX_TX_NEED_SEND_HDR_W1)
362 send_hdr->w1.u = w1.u;
364 if (!(flags & NIX_TX_MULTI_SEG_F)) {
365 sg->seg1_size = m->data_len;
366 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
368 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
369 /* DF bit = 1 if refcount of current mbuf or parent mbuf
371 * DF bit = 0 otherwise
373 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
375 /* Mark mempool object as "put" since it is freed by NIX */
376 if (!send_hdr->w0.df)
377 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
380 /* With minimal offloads, 'cmd' being local could be optimized out to
381 * registers. In other cases, 'cmd' will be in stack. Intent is
382 * 'cmd' stores content from txq->cmd which is copied only once.
384 *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
386 if (flags & NIX_TX_NEED_EXT_HDR) {
387 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
390 /* In case of multi-seg, sg template is stored here */
391 *((union nix_send_sg_s *)lmt_addr) = *sg;
392 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
395 static __rte_always_inline void
396 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
397 const uint64_t ol_flags, const uint16_t no_segdw,
398 const uint16_t flags)
400 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
401 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
402 struct nix_send_ext_s *send_hdr_ext =
403 (struct nix_send_ext_s *)lmt_addr + 16;
404 uint64_t *lmt = (uint64_t *)lmt_addr;
405 uint16_t off = (no_segdw - 1) << 1;
406 struct nix_send_mem_s *send_mem;
408 send_mem = (struct nix_send_mem_s *)(lmt + off);
409 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
410 send_hdr_ext->w0.tstmp = 1;
411 if (flags & NIX_TX_MULTI_SEG_F) {
412 /* Retrieving the default desc values */
415 /* Using compiler barier to avoid voilation of C
418 rte_compiler_barrier();
421 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
422 * should not be recorded, hence changing the alg type to
423 * NIX_SENDMEMALG_SET and also changing send mem addr field to
424 * next 8 bytes as it corrpt the actual tx tstamp registered
427 send_mem->w0.subdc = NIX_SUBDC_MEM;
428 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
430 (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
434 static __rte_always_inline uint16_t
435 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
437 struct nix_send_hdr_s *send_hdr;
438 union nix_send_sg_s *sg;
439 struct rte_mbuf *m_next;
440 uint64_t *slist, sg_u;
445 send_hdr = (struct nix_send_hdr_s *)cmd;
446 send_hdr->w0.total = m->pkt_len;
447 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
449 if (flags & NIX_TX_NEED_EXT_HDR)
454 sg = (union nix_send_sg_s *)&cmd[2 + off];
455 /* Clear sg->u header before use */
456 sg->u &= 0xFC00000000000000;
458 slist = &cmd[3 + off];
461 nb_segs = m->nb_segs;
463 /* Fill mbuf segments */
466 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
467 *slist = rte_mbuf_data_iova(m);
468 /* Set invert df if buffer is not to be freed by H/W */
469 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
470 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
471 /* Mark mempool object as "put" since it is freed by NIX
473 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
474 if (!(sg_u & (1ULL << (i + 55))))
475 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
480 if (i > 2 && nb_segs) {
482 /* Next SG subdesc */
483 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
486 sg = (union nix_send_sg_s *)slist;
495 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
496 /* Roundup extra dwords to multiple of 2 */
497 segdw = (segdw >> 1) + (segdw & 0x1);
499 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
500 send_hdr->w0.sizem1 = segdw - 1;
505 static __rte_always_inline uint16_t
506 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
507 uint64_t *cmd, const uint16_t flags)
509 struct cn10k_eth_txq *txq = tx_queue;
510 const rte_iova_t io_addr = txq->io_addr;
511 uintptr_t pa, lmt_addr = txq->lmt_base;
512 uint16_t lmt_id, burst, left, i;
513 uint64_t lso_tun_fmt;
516 NIX_XMIT_FC_OR_RETURN(txq, pkts);
518 /* Get cmd skeleton */
519 cn10k_nix_tx_skeleton(txq, cmd, flags);
521 /* Reduce the cached count */
522 txq->fc_cache_pkts -= pkts;
524 if (flags & NIX_TX_OFFLOAD_TSO_F)
525 lso_tun_fmt = txq->lso_tun_fmt;
527 /* Get LMT base address and LMT ID as lcore id */
528 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
531 burst = left > 32 ? 32 : left;
532 for (i = 0; i < burst; i++) {
533 /* Perform header writes for TSO, barrier at
534 * lmt steorl will suffice.
536 if (flags & NIX_TX_OFFLOAD_TSO_F)
537 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
539 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
541 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
542 tx_pkts[i]->ol_flags, 4, flags);
543 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
548 data = cn10k_nix_tx_steor_data(flags);
549 pa = io_addr | (data & 0x7) << 4;
551 data |= (15ULL << 12);
552 data |= (uint64_t)lmt_id;
555 roc_lmt_submit_steorl(data, pa);
557 data = cn10k_nix_tx_steor_data(flags);
558 pa = io_addr | (data & 0x7) << 4;
560 data |= ((uint64_t)(burst - 17)) << 12;
561 data |= (uint64_t)(lmt_id + 16);
564 roc_lmt_submit_steorl(data, pa);
566 data = cn10k_nix_tx_steor_data(flags);
567 pa = io_addr | (data & 0x7) << 4;
569 data |= ((uint64_t)(burst - 1)) << 12;
573 roc_lmt_submit_steorl(data, pa);
579 /* Start processing another burst */
581 /* Reset lmt base addr */
582 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
583 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
590 static __rte_always_inline uint16_t
591 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
592 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
594 struct cn10k_eth_txq *txq = tx_queue;
595 uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
596 const rte_iova_t io_addr = txq->io_addr;
597 uint16_t segdw, lmt_id, burst, left, i;
598 uint64_t data0, data1;
599 uint64_t lso_tun_fmt;
603 NIX_XMIT_FC_OR_RETURN(txq, pkts);
605 cn10k_nix_tx_skeleton(txq, cmd, flags);
607 /* Reduce the cached count */
608 txq->fc_cache_pkts -= pkts;
610 if (flags & NIX_TX_OFFLOAD_TSO_F)
611 lso_tun_fmt = txq->lso_tun_fmt;
613 /* Get LMT base address and LMT ID as lcore id */
614 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
617 burst = left > 32 ? 32 : left;
620 for (i = 0; i < burst; i++) {
621 /* Perform header writes for TSO, barrier at
622 * lmt steorl will suffice.
624 if (flags & NIX_TX_OFFLOAD_TSO_F)
625 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
627 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
629 /* Store sg list directly on lmt line */
630 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
632 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
633 tx_pkts[i]->ol_flags, segdw,
635 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
636 data128 |= (((__uint128_t)(segdw - 1)) << shft);
640 data0 = (uint64_t)data128;
641 data1 = (uint64_t)(data128 >> 64);
642 /* Make data0 similar to data1 */
646 pa0 = io_addr | (data0 & 0x7) << 4;
648 /* Move lmtst1..15 sz to bits 63:19 */
650 data0 |= (15ULL << 12);
651 data0 |= (uint64_t)lmt_id;
654 roc_lmt_submit_steorl(data0, pa0);
656 pa1 = io_addr | (data1 & 0x7) << 4;
659 data1 |= ((uint64_t)(burst - 17)) << 12;
660 data1 |= (uint64_t)(lmt_id + 16);
663 roc_lmt_submit_steorl(data1, pa1);
665 pa0 = io_addr | (data0 & 0x7) << 4;
667 /* Move lmtst1..15 sz to bits 63:19 */
669 data0 |= ((burst - 1) << 12);
670 data0 |= (uint64_t)lmt_id;
673 roc_lmt_submit_steorl(data0, pa0);
679 /* Start processing another burst */
681 /* Reset lmt base addr */
682 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
683 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
690 #if defined(RTE_ARCH_ARM64)
692 static __rte_always_inline void
693 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
694 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
695 const uint64_t flags, const uint64_t lso_tun_fmt)
700 if (!(ol_flags & PKT_TX_TCP_SEG))
703 mask = -(!w1->il3type);
704 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
708 w0->lso_mps = m->tso_segsz;
709 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
710 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
712 /* Handle tunnel tso */
713 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
714 (ol_flags & PKT_TX_TUNNEL_MASK)) {
715 const uint8_t is_udp_tun =
716 (CNXK_NIX_UDP_TUN_BITMASK >>
717 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
719 uint8_t shift = is_udp_tun ? 32 : 0;
721 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
722 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
724 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
725 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
726 /* Update format for UDP tunneled packet */
728 w0->lso_format = (lso_tun_fmt >> shift);
732 #define NIX_DESCS_PER_LOOP 4
733 static __rte_always_inline uint16_t
734 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
735 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
737 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
738 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
739 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
740 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
741 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
742 uint64x2_t senddesc01_w0, senddesc23_w0;
743 uint64x2_t senddesc01_w1, senddesc23_w1;
744 uint16_t left, scalar, burst, i, lmt_id;
745 uint64x2_t sendext01_w0, sendext23_w0;
746 uint64x2_t sendext01_w1, sendext23_w1;
747 uint64x2_t sendmem01_w0, sendmem23_w0;
748 uint64x2_t sendmem01_w1, sendmem23_w1;
749 uint64x2_t sgdesc01_w0, sgdesc23_w0;
750 uint64x2_t sgdesc01_w1, sgdesc23_w1;
751 struct cn10k_eth_txq *txq = tx_queue;
752 uintptr_t laddr = txq->lmt_base;
753 rte_iova_t io_addr = txq->io_addr;
754 uint64x2_t ltypes01, ltypes23;
755 uint64x2_t xtmp128, ytmp128;
756 uint64x2_t xmask01, xmask23;
759 NIX_XMIT_FC_OR_RETURN(txq, pkts);
761 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
762 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
764 /* Reduce the cached count */
765 txq->fc_cache_pkts -= pkts;
766 /* Perform header writes before barrier for TSO */
767 if (flags & NIX_TX_OFFLOAD_TSO_F) {
768 for (i = 0; i < pkts; i++)
769 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
772 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
773 senddesc23_w0 = senddesc01_w0;
774 senddesc01_w1 = vdupq_n_u64(0);
775 senddesc23_w1 = senddesc01_w1;
776 sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
777 sgdesc23_w0 = sgdesc01_w0;
779 /* Load command defaults into vector variables. */
780 if (flags & NIX_TX_NEED_EXT_HDR) {
781 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
782 sendext23_w0 = sendext01_w0;
783 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
784 sendext23_w1 = sendext01_w1;
785 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
786 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
787 sendmem23_w0 = sendmem01_w0;
788 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
789 sendmem23_w1 = sendmem01_w1;
793 /* Get LMT base address and LMT ID as lcore id */
794 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
797 /* Number of packets to prepare depends on offloads enabled. */
798 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
799 cn10k_nix_pkts_per_vec_brst(flags) :
802 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
803 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
805 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
806 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
808 senddesc23_w0 = senddesc01_w0;
809 sgdesc23_w0 = sgdesc01_w0;
811 /* Clear vlan enables. */
812 if (flags & NIX_TX_NEED_EXT_HDR) {
813 sendext01_w1 = vbicq_u64(sendext01_w1,
814 vdupq_n_u64(0x3FFFF00FFFF00));
815 sendext23_w1 = sendext01_w1;
818 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
819 /* Reset send mem alg to SETTSTMP from SUB*/
820 sendmem01_w0 = vbicq_u64(sendmem01_w0,
821 vdupq_n_u64(BIT_ULL(59)));
822 /* Reset send mem address to default. */
824 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
825 sendmem23_w0 = sendmem01_w0;
826 sendmem23_w1 = sendmem01_w1;
829 if (flags & NIX_TX_OFFLOAD_TSO_F) {
830 /* Clear the LSO enable bit. */
831 sendext01_w0 = vbicq_u64(sendext01_w0,
832 vdupq_n_u64(BIT_ULL(14)));
833 sendext23_w0 = sendext01_w0;
836 /* Move mbufs to iova */
837 mbuf0 = (uint64_t *)tx_pkts[0];
838 mbuf1 = (uint64_t *)tx_pkts[1];
839 mbuf2 = (uint64_t *)tx_pkts[2];
840 mbuf3 = (uint64_t *)tx_pkts[3];
842 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
843 offsetof(struct rte_mbuf, buf_iova));
844 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
845 offsetof(struct rte_mbuf, buf_iova));
846 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
847 offsetof(struct rte_mbuf, buf_iova));
848 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
849 offsetof(struct rte_mbuf, buf_iova));
851 * Get mbuf's, olflags, iova, pktlen, dataoff
852 * dataoff_iovaX.D[0] = iova,
853 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
854 * len_olflagsX.D[0] = ol_flags,
855 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
857 dataoff_iova0 = vld1q_u64(mbuf0);
858 len_olflags0 = vld1q_u64(mbuf0 + 2);
859 dataoff_iova1 = vld1q_u64(mbuf1);
860 len_olflags1 = vld1q_u64(mbuf1 + 2);
861 dataoff_iova2 = vld1q_u64(mbuf2);
862 len_olflags2 = vld1q_u64(mbuf2 + 2);
863 dataoff_iova3 = vld1q_u64(mbuf3);
864 len_olflags3 = vld1q_u64(mbuf3 + 2);
866 /* Move mbufs to point pool */
867 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
868 offsetof(struct rte_mbuf, pool) -
869 offsetof(struct rte_mbuf, buf_iova));
870 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
871 offsetof(struct rte_mbuf, pool) -
872 offsetof(struct rte_mbuf, buf_iova));
873 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
874 offsetof(struct rte_mbuf, pool) -
875 offsetof(struct rte_mbuf, buf_iova));
876 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
877 offsetof(struct rte_mbuf, pool) -
878 offsetof(struct rte_mbuf, buf_iova));
880 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
881 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
882 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
884 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
885 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
888 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
889 : [a] "+w"(senddesc01_w1)
890 : [in] "r"(mbuf0 + 2)
893 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
894 : [a] "+w"(senddesc01_w1)
895 : [in] "r"(mbuf1 + 2)
898 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
899 : [b] "+w"(senddesc23_w1)
900 : [in] "r"(mbuf2 + 2)
903 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
904 : [b] "+w"(senddesc23_w1)
905 : [in] "r"(mbuf3 + 2)
908 /* Get pool pointer alone */
909 mbuf0 = (uint64_t *)*mbuf0;
910 mbuf1 = (uint64_t *)*mbuf1;
911 mbuf2 = (uint64_t *)*mbuf2;
912 mbuf3 = (uint64_t *)*mbuf3;
914 /* Get pool pointer alone */
915 mbuf0 = (uint64_t *)*mbuf0;
916 mbuf1 = (uint64_t *)*mbuf1;
917 mbuf2 = (uint64_t *)*mbuf2;
918 mbuf3 = (uint64_t *)*mbuf3;
921 const uint8x16_t shuf_mask2 = {
922 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
923 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
925 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
926 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
928 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
929 const uint64x2_t and_mask0 = {
934 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
935 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
936 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
937 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
940 * Pick only 16 bits of pktlen preset at bits 63:32
941 * and place them at bits 15:0.
943 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
944 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
946 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
947 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
948 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
950 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
951 * pktlen at 15:0 position.
953 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
954 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
955 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
956 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
958 /* Move mbuf to point to pool_id. */
959 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
960 offsetof(struct rte_mempool, pool_id));
961 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
962 offsetof(struct rte_mempool, pool_id));
963 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
964 offsetof(struct rte_mempool, pool_id));
965 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
966 offsetof(struct rte_mempool, pool_id));
968 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
969 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
971 * Lookup table to translate ol_flags to
972 * il3/il4 types. But we still use ol3/ol4 types in
973 * senddesc_w1 as only one header processing is enabled.
975 const uint8x16_t tbl = {
976 /* [0-15] = il4type:il3type */
977 0x04, /* none (IPv6 assumed) */
978 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
979 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
980 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
981 0x03, /* PKT_TX_IP_CKSUM */
982 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
983 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
984 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
985 0x02, /* PKT_TX_IPV4 */
986 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
987 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
988 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
989 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
990 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
993 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
996 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1001 /* Extract olflags to translate to iltypes */
1002 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1003 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1006 * E(47):L3_LEN(9):L2_LEN(7+z)
1007 * E(47):L3_LEN(9):L2_LEN(7+z)
1009 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1010 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1012 /* Move OLFLAGS bits 55:52 to 51:48
1013 * with zeros preprended on the byte and rest
1016 xtmp128 = vshrq_n_u8(xtmp128, 4);
1017 ytmp128 = vshrq_n_u8(ytmp128, 4);
1019 * E(48):L3_LEN(8):L2_LEN(z+7)
1020 * E(48):L3_LEN(8):L2_LEN(z+7)
1022 const int8x16_t tshft3 = {
1023 -1, 0, 8, 8, 8, 8, 8, 8,
1024 -1, 0, 8, 8, 8, 8, 8, 8,
1027 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1028 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1031 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1032 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1034 /* Pick only relevant fields i.e Bit 48:55 of iltype
1035 * and place it in ol3/ol4type of senddesc_w1
1037 const uint8x16_t shuf_mask0 = {
1038 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1039 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1042 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1043 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1045 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1046 * a [E(32):E(16):OL3(8):OL2(8)]
1048 * a [E(32):E(16):(OL3+OL2):OL2]
1049 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1051 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1052 vshlq_n_u16(senddesc01_w1, 8));
1053 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1054 vshlq_n_u16(senddesc23_w1, 8));
1056 /* Move ltypes to senddesc*_w1 */
1057 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1058 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1059 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1060 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1062 * Lookup table to translate ol_flags to
1066 const uint8x16_t tbl = {
1067 /* [0-15] = ol4type:ol3type */
1069 0x03, /* OUTER_IP_CKSUM */
1070 0x02, /* OUTER_IPV4 */
1071 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1072 0x04, /* OUTER_IPV6 */
1073 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1074 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1075 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1078 0x00, /* OUTER_UDP_CKSUM */
1079 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1080 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1081 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1084 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1085 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1088 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1091 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1092 * OUTER_IPV4 | OUTER_IP_CKSUM
1096 /* Extract olflags to translate to iltypes */
1097 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1098 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1101 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1102 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1104 const uint8x16_t shuf_mask5 = {
1105 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1106 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1108 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1109 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1111 /* Extract outer ol flags only */
1112 const uint64x2_t o_cksum_mask = {
1117 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1118 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1120 /* Extract OUTER_UDP_CKSUM bit 41 and
1124 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1125 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1127 /* Shift oltype by 2 to start nibble from BIT(56)
1128 * instead of BIT(58)
1130 xtmp128 = vshrq_n_u8(xtmp128, 2);
1131 ytmp128 = vshrq_n_u8(ytmp128, 2);
1133 * E(48):L3_LEN(8):L2_LEN(z+7)
1134 * E(48):L3_LEN(8):L2_LEN(z+7)
1136 const int8x16_t tshft3 = {
1137 -1, 0, 8, 8, 8, 8, 8, 8,
1138 -1, 0, 8, 8, 8, 8, 8, 8,
1141 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1142 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1145 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1146 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1148 /* Pick only relevant fields i.e Bit 56:63 of oltype
1149 * and place it in ol3/ol4type of senddesc_w1
1151 const uint8x16_t shuf_mask0 = {
1152 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1153 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1156 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1157 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1159 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1160 * a [E(32):E(16):OL3(8):OL2(8)]
1162 * a [E(32):E(16):(OL3+OL2):OL2]
1163 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1165 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1166 vshlq_n_u16(senddesc01_w1, 8));
1167 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1168 vshlq_n_u16(senddesc23_w1, 8));
1170 /* Move ltypes to senddesc*_w1 */
1171 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1172 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1173 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1174 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1175 /* Lookup table to translate ol_flags to
1176 * ol4type, ol3type, il4type, il3type of senddesc_w1
1178 const uint8x16x2_t tbl = {{
1180 /* [0-15] = il4type:il3type */
1181 0x04, /* none (IPv6) */
1182 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1183 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1184 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1185 0x03, /* PKT_TX_IP_CKSUM */
1186 0x13, /* PKT_TX_IP_CKSUM |
1189 0x23, /* PKT_TX_IP_CKSUM |
1192 0x33, /* PKT_TX_IP_CKSUM |
1195 0x02, /* PKT_TX_IPV4 */
1196 0x12, /* PKT_TX_IPV4 |
1199 0x22, /* PKT_TX_IPV4 |
1202 0x32, /* PKT_TX_IPV4 |
1205 0x03, /* PKT_TX_IPV4 |
1208 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1211 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1214 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1220 /* [16-31] = ol4type:ol3type */
1222 0x03, /* OUTER_IP_CKSUM */
1223 0x02, /* OUTER_IPV4 */
1224 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1225 0x04, /* OUTER_IPV6 */
1226 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1227 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1228 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1231 0x00, /* OUTER_UDP_CKSUM */
1232 0x33, /* OUTER_UDP_CKSUM |
1235 0x32, /* OUTER_UDP_CKSUM |
1238 0x33, /* OUTER_UDP_CKSUM |
1239 * OUTER_IPV4 | OUTER_IP_CKSUM
1241 0x34, /* OUTER_UDP_CKSUM |
1244 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1247 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1250 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1251 * OUTER_IPV4 | OUTER_IP_CKSUM
1256 /* Extract olflags to translate to oltype & iltype */
1257 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1258 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1261 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1262 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1264 const uint32x4_t tshft_4 = {
1270 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1271 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1274 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1275 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1277 const uint8x16_t shuf_mask5 = {
1278 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1279 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1281 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1282 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1284 /* Extract outer and inner header ol_flags */
1285 const uint64x2_t oi_cksum_mask = {
1290 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1291 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1293 /* Extract OUTER_UDP_CKSUM bit 41 and
1297 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1298 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1300 /* Shift right oltype by 2 and iltype by 4
1301 * to start oltype nibble from BIT(58)
1302 * instead of BIT(56) and iltype nibble from BIT(48)
1303 * instead of BIT(52).
1305 const int8x16_t tshft5 = {
1306 8, 8, 8, 8, 8, 8, -4, -2,
1307 8, 8, 8, 8, 8, 8, -4, -2,
1310 xtmp128 = vshlq_u8(xtmp128, tshft5);
1311 ytmp128 = vshlq_u8(ytmp128, tshft5);
1313 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1314 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1316 const int8x16_t tshft3 = {
1317 -1, 0, -1, 0, 0, 0, 0, 0,
1318 -1, 0, -1, 0, 0, 0, 0, 0,
1321 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1322 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1324 /* Mark Bit(4) of oltype */
1325 const uint64x2_t oi_cksum_mask2 = {
1330 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1331 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1334 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1335 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1337 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1338 * Bit 56:63 of oltype and place it in corresponding
1339 * place in senddesc_w1.
1341 const uint8x16_t shuf_mask0 = {
1342 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1343 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1346 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1347 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1349 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1350 * l3len, l2len, ol3len, ol2len.
1351 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1353 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1355 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1356 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1358 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1359 vshlq_n_u32(senddesc01_w1, 8));
1360 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1361 vshlq_n_u32(senddesc23_w1, 8));
1363 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1364 senddesc01_w1 = vaddq_u8(
1365 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1366 senddesc23_w1 = vaddq_u8(
1367 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1369 /* Move ltypes to senddesc*_w1 */
1370 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1371 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1374 xmask01 = vdupq_n_u64(0);
1376 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1381 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1386 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1391 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1395 xmask01 = vshlq_n_u64(xmask01, 20);
1396 xmask23 = vshlq_n_u64(xmask23, 20);
1398 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1399 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1401 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1402 /* Tx ol_flag for vlan. */
1403 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1404 /* Bit enable for VLAN1 */
1405 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1406 /* Tx ol_flag for QnQ. */
1407 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1408 /* Bit enable for VLAN0 */
1409 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1410 /* Load vlan values from packet. outer is VLAN 0 */
1411 uint64x2_t ext01 = {
1412 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1413 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1414 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1415 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1417 uint64x2_t ext23 = {
1418 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1419 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1420 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1421 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1424 /* Get ol_flags of the packets. */
1425 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1426 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1428 /* ORR vlan outer/inner values into cmd. */
1429 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1430 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1432 /* Test for offload enable bits and generate masks. */
1433 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1435 vandq_u64(vtstq_u64(xtmp128, olq),
1437 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1439 vandq_u64(vtstq_u64(ytmp128, olq),
1442 /* Set vlan enable bits into cmd based on mask. */
1443 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1444 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1447 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1448 /* Tx ol_flag for timestam. */
1449 const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
1450 PKT_TX_IEEE1588_TMST};
1451 /* Set send mem alg to SUB. */
1452 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1453 /* Increment send mem address by 8. */
1454 const uint64x2_t addr = {0x8, 0x8};
1456 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1457 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1459 /* Check if timestamp is requested and generate inverted
1460 * mask as we need not make any changes to default cmd
1463 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1464 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1466 /* Change send mem address to an 8 byte offset when
1467 * TSTMP is disabled.
1469 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1470 vandq_u64(xtmp128, addr));
1471 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1472 vandq_u64(ytmp128, addr));
1473 /* Change send mem alg to SUB when TSTMP is disabled. */
1474 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1475 vandq_u64(xtmp128, alg));
1476 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1477 vandq_u64(ytmp128, alg));
1479 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1480 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1481 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1482 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1485 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1486 const uint64_t lso_fmt = txq->lso_tun_fmt;
1487 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1488 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1490 /* Extract SD W1 as we need to set L4 types. */
1491 vst1q_u64(sd_w1, senddesc01_w1);
1492 vst1q_u64(sd_w1 + 2, senddesc23_w1);
1494 /* Extract SX W0 as we need to set LSO fields. */
1495 vst1q_u64(sx_w0, sendext01_w0);
1496 vst1q_u64(sx_w0 + 2, sendext23_w0);
1498 /* Extract ol_flags. */
1499 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1500 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1502 /* Prepare individual mbufs. */
1503 cn10k_nix_prepare_tso(tx_pkts[0],
1504 (union nix_send_hdr_w1_u *)&sd_w1[0],
1505 (union nix_send_ext_w0_u *)&sx_w0[0],
1506 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
1508 cn10k_nix_prepare_tso(tx_pkts[1],
1509 (union nix_send_hdr_w1_u *)&sd_w1[1],
1510 (union nix_send_ext_w0_u *)&sx_w0[1],
1511 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
1513 cn10k_nix_prepare_tso(tx_pkts[2],
1514 (union nix_send_hdr_w1_u *)&sd_w1[2],
1515 (union nix_send_ext_w0_u *)&sx_w0[2],
1516 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
1518 cn10k_nix_prepare_tso(tx_pkts[3],
1519 (union nix_send_hdr_w1_u *)&sd_w1[3],
1520 (union nix_send_ext_w0_u *)&sx_w0[3],
1521 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
1523 senddesc01_w1 = vld1q_u64(sd_w1);
1524 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1526 sendext01_w0 = vld1q_u64(sx_w0);
1527 sendext23_w0 = vld1q_u64(sx_w0 + 2);
1530 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1531 /* Set don't free bit if reference count > 1 */
1532 xmask01 = vdupq_n_u64(0);
1535 /* Move mbufs to iova */
1536 mbuf0 = (uint64_t *)tx_pkts[0];
1537 mbuf1 = (uint64_t *)tx_pkts[1];
1538 mbuf2 = (uint64_t *)tx_pkts[2];
1539 mbuf3 = (uint64_t *)tx_pkts[3];
1541 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1542 vsetq_lane_u64(0x80000, xmask01, 0);
1544 __mempool_check_cookies(
1545 ((struct rte_mbuf *)mbuf0)->pool,
1546 (void **)&mbuf0, 1, 0);
1548 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1549 vsetq_lane_u64(0x80000, xmask01, 1);
1551 __mempool_check_cookies(
1552 ((struct rte_mbuf *)mbuf1)->pool,
1553 (void **)&mbuf1, 1, 0);
1555 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1556 vsetq_lane_u64(0x80000, xmask23, 0);
1558 __mempool_check_cookies(
1559 ((struct rte_mbuf *)mbuf2)->pool,
1560 (void **)&mbuf2, 1, 0);
1562 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1563 vsetq_lane_u64(0x80000, xmask23, 1);
1565 __mempool_check_cookies(
1566 ((struct rte_mbuf *)mbuf3)->pool,
1567 (void **)&mbuf3, 1, 0);
1568 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1569 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1571 /* Move mbufs to iova */
1572 mbuf0 = (uint64_t *)tx_pkts[0];
1573 mbuf1 = (uint64_t *)tx_pkts[1];
1574 mbuf2 = (uint64_t *)tx_pkts[2];
1575 mbuf3 = (uint64_t *)tx_pkts[3];
1577 /* Mark mempool object as "put" since
1578 * it is freed by NIX
1580 __mempool_check_cookies(
1581 ((struct rte_mbuf *)mbuf0)->pool,
1582 (void **)&mbuf0, 1, 0);
1584 __mempool_check_cookies(
1585 ((struct rte_mbuf *)mbuf1)->pool,
1586 (void **)&mbuf1, 1, 0);
1588 __mempool_check_cookies(
1589 ((struct rte_mbuf *)mbuf2)->pool,
1590 (void **)&mbuf2, 1, 0);
1592 __mempool_check_cookies(
1593 ((struct rte_mbuf *)mbuf3)->pool,
1594 (void **)&mbuf3, 1, 0);
1597 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1598 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1599 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1600 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1601 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1603 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1604 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1605 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1606 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1608 if (flags & NIX_TX_NEED_EXT_HDR) {
1609 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1610 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1611 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1612 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1615 if (flags & NIX_TX_NEED_EXT_HDR) {
1616 /* Store the prepared send desc to LMT lines */
1617 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1618 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1619 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1620 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1621 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
1622 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
1623 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
1624 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
1625 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
1627 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1628 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1629 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1630 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
1631 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
1632 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
1633 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
1634 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
1636 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1637 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
1638 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
1639 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
1640 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
1641 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
1643 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
1644 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
1645 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
1646 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
1647 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
1648 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
1652 /* Store the prepared send desc to LMT lines */
1653 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1654 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
1655 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
1656 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
1657 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
1658 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
1659 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
1660 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
1664 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1669 data = cn10k_nix_tx_steor_vec_data(flags);
1670 pa = io_addr | (data & 0x7) << 4;
1672 data |= (15ULL << 12);
1673 data |= (uint64_t)lmt_id;
1676 roc_lmt_submit_steorl(data, pa);
1678 data = cn10k_nix_tx_steor_vec_data(flags);
1679 pa = io_addr | (data & 0x7) << 4;
1681 data |= ((uint64_t)(lnum - 17)) << 12;
1682 data |= (uint64_t)(lmt_id + 16);
1685 roc_lmt_submit_steorl(data, pa);
1687 data = cn10k_nix_tx_steor_vec_data(flags);
1688 pa = io_addr | (data & 0x7) << 4;
1690 data |= ((uint64_t)(lnum - 1)) << 12;
1694 roc_lmt_submit_steorl(data, pa);
1702 if (unlikely(scalar))
1703 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
1710 static __rte_always_inline uint16_t
1711 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1712 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1714 RTE_SET_USED(tx_queue);
1715 RTE_SET_USED(tx_pkts);
1718 RTE_SET_USED(flags);
1723 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1724 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1725 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1726 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1727 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1728 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1730 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1731 #define NIX_TX_FASTPATH_MODES \
1732 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
1733 NIX_TX_OFFLOAD_NONE) \
1734 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
1736 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
1738 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
1739 OL3OL4CSUM_F | L3L4CSUM_F) \
1740 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
1742 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
1743 VLAN_F | L3L4CSUM_F) \
1744 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
1745 VLAN_F | OL3OL4CSUM_F) \
1746 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
1747 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1748 T(noff, 0, 0, 1, 0, 0, 0, 4, \
1750 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
1751 NOFF_F | L3L4CSUM_F) \
1752 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
1753 NOFF_F | OL3OL4CSUM_F) \
1754 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
1755 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1756 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
1758 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
1759 NOFF_F | VLAN_F | L3L4CSUM_F) \
1760 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
1761 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1762 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
1763 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1764 T(tso, 0, 1, 0, 0, 0, 0, 6, \
1766 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
1767 TSO_F | L3L4CSUM_F) \
1768 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
1769 TSO_F | OL3OL4CSUM_F) \
1770 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
1771 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1772 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
1774 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
1775 TSO_F | VLAN_F | L3L4CSUM_F) \
1776 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
1777 TSO_F | VLAN_F | OL3OL4CSUM_F) \
1778 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
1779 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1780 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
1782 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
1783 TSO_F | NOFF_F | L3L4CSUM_F) \
1784 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
1785 TSO_F | NOFF_F | OL3OL4CSUM_F) \
1786 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
1787 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1788 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
1789 TSO_F | NOFF_F | VLAN_F) \
1790 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
1791 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1792 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
1793 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1794 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
1795 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1796 T(ts, 1, 0, 0, 0, 0, 0, 8, \
1798 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
1799 TSP_F | L3L4CSUM_F) \
1800 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
1801 TSP_F | OL3OL4CSUM_F) \
1802 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
1803 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1804 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
1806 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
1807 TSP_F | VLAN_F | L3L4CSUM_F) \
1808 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
1809 TSP_F | VLAN_F | OL3OL4CSUM_F) \
1810 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
1811 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1812 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
1814 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
1815 TSP_F | NOFF_F | L3L4CSUM_F) \
1816 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
1817 TSP_F | NOFF_F | OL3OL4CSUM_F) \
1818 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
1819 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1820 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
1821 TSP_F | NOFF_F | VLAN_F) \
1822 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
1823 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1824 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
1825 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1826 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
1827 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1828 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
1830 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
1831 TSP_F | TSO_F | L3L4CSUM_F) \
1832 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
1833 TSP_F | TSO_F | OL3OL4CSUM_F) \
1834 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
1835 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1836 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
1837 TSP_F | TSO_F | VLAN_F) \
1838 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
1839 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1840 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
1841 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1842 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
1843 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1844 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
1845 TSP_F | TSO_F | NOFF_F) \
1846 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
1847 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1848 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
1849 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1850 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
1851 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1852 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
1853 TSP_F | TSO_F | NOFF_F | VLAN_F) \
1854 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
1855 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1856 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
1857 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1858 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
1859 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1861 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
1862 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
1863 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1865 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
1866 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1868 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
1869 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1871 NIX_TX_FASTPATH_MODES
1874 #endif /* __CN10K_TX_H__ */