1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
17 /* Flags to control xmit_prepare function.
18 * Defining it from backwards to denote its been
19 * not used as offload flags to pick function
21 #define NIX_TX_MULTI_SEG_F BIT(15)
23 #define NIX_TX_NEED_SEND_HDR_W1 \
24 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
25 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
27 #define NIX_TX_NEED_EXT_HDR \
28 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
31 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
33 /* Cached value is low, Update the fc_cache_pkts */ \
34 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
35 /* Multiply with sqe_per_sqb to express in pkts */ \
36 (txq)->fc_cache_pkts = \
37 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
38 << (txq)->sqes_per_sqb_log2; \
39 /* Check it again for the room */ \
40 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
45 /* Function to determine no of tx subdesc required in case ext
46 * sub desc is enabled.
48 static __rte_always_inline int
49 cn9k_nix_tx_ext_subs(const uint16_t flags)
51 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
54 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
59 static __rte_always_inline void
60 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
62 uint64_t mask, ol_flags = m->ol_flags;
64 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
65 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
66 uint16_t *iplen, *oiplen, *oudplen;
67 uint16_t lso_sb, paylen;
69 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
70 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
71 m->l2_len + m->l3_len + m->l4_len;
73 /* Reduce payload len from base headers */
74 paylen = m->pkt_len - lso_sb;
76 /* Get iplen position assuming no tunnel hdr */
77 iplen = (uint16_t *)(mdata + m->l2_len +
78 (2 << !!(ol_flags & PKT_TX_IPV6)));
79 /* Handle tunnel tso */
80 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
81 (ol_flags & PKT_TX_TUNNEL_MASK)) {
82 const uint8_t is_udp_tun =
83 (CNXK_NIX_UDP_TUN_BITMASK >>
84 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
87 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
90 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
93 /* Update format for UDP tunneled packet */
95 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
97 *oudplen = rte_cpu_to_be_16(
98 rte_be_to_cpu_16(*oudplen) - paylen);
101 /* Update iplen position to inner ip hdr */
102 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
104 (2 << !!(ol_flags & PKT_TX_IPV6)));
107 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
111 static __rte_always_inline void
112 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
113 const uint64_t lso_tun_fmt)
115 struct nix_send_ext_s *send_hdr_ext;
116 struct nix_send_hdr_s *send_hdr;
117 uint64_t ol_flags = 0, mask;
118 union nix_send_hdr_w1_u w1;
119 union nix_send_sg_s *sg;
121 send_hdr = (struct nix_send_hdr_s *)cmd;
122 if (flags & NIX_TX_NEED_EXT_HDR) {
123 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
124 sg = (union nix_send_sg_s *)(cmd + 4);
125 /* Clear previous markings */
126 send_hdr_ext->w0.lso = 0;
127 send_hdr_ext->w1.u = 0;
129 sg = (union nix_send_sg_s *)(cmd + 2);
132 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
133 ol_flags = m->ol_flags;
137 if (!(flags & NIX_TX_MULTI_SEG_F)) {
138 send_hdr->w0.total = m->data_len;
140 roc_npa_aura_handle_to_aura(m->pool->pool_id);
145 * 3 => IPV4 with csum
147 * L3type and L3ptr needs to be set for either
148 * L3 csum or L4 csum or LSO
152 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
153 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
154 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
155 const uint8_t ol3type =
156 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
157 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
158 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
161 w1.ol3type = ol3type;
162 mask = 0xffffull << ((!!ol3type) << 4);
163 w1.ol3ptr = ~mask & m->outer_l2_len;
164 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
167 w1.ol4type = csum + (csum << 1);
170 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
171 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
172 w1.il3ptr = w1.ol4ptr + m->l2_len;
173 w1.il4ptr = w1.il3ptr + m->l3_len;
174 /* Increment it by 1 if it is IPV4 as 3 is with csum */
175 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
178 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
180 /* In case of no tunnel header use only
181 * shift IL3/IL4 fields a bit to use
182 * OL3/OL4 for header checksum
185 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
186 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
188 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
189 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
190 const uint8_t outer_l2_len = m->outer_l2_len;
193 w1.ol3ptr = outer_l2_len;
194 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
195 /* Increment it by 1 if it is IPV4 as 3 is with csum */
196 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
197 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
198 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
201 w1.ol4type = csum + (csum << 1);
203 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
204 const uint8_t l2_len = m->l2_len;
206 /* Always use OLXPTR and OLXTYPE when only
207 * when one header is present
212 w1.ol4ptr = l2_len + m->l3_len;
213 /* Increment it by 1 if it is IPV4 as 3 is with csum */
214 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
215 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
216 !!(ol_flags & PKT_TX_IP_CKSUM);
219 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
222 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
223 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
224 /* HW will update ptr after vlan0 update */
225 send_hdr_ext->w1.vlan1_ins_ptr = 12;
226 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
228 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
229 /* 2B before end of l2 header */
230 send_hdr_ext->w1.vlan0_ins_ptr = 12;
231 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
234 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
238 mask = -(!w1.il3type);
239 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
241 send_hdr_ext->w0.lso_sb = lso_sb;
242 send_hdr_ext->w0.lso = 1;
243 send_hdr_ext->w0.lso_mps = m->tso_segsz;
244 send_hdr_ext->w0.lso_format =
245 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
246 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
248 /* Handle tunnel tso */
249 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
250 (ol_flags & PKT_TX_TUNNEL_MASK)) {
251 const uint8_t is_udp_tun =
252 (CNXK_NIX_UDP_TUN_BITMASK >>
253 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
255 uint8_t shift = is_udp_tun ? 32 : 0;
257 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
258 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
260 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
261 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
262 /* Update format for UDP tunneled packet */
263 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
267 if (flags & NIX_TX_NEED_SEND_HDR_W1)
268 send_hdr->w1.u = w1.u;
270 if (!(flags & NIX_TX_MULTI_SEG_F)) {
271 sg->seg1_size = m->data_len;
272 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
274 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
275 /* DF bit = 1 if refcount of current mbuf or parent mbuf
277 * DF bit = 0 otherwise
279 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
280 /* Ensuring mbuf fields which got updated in
281 * cnxk_nix_prefree_seg are written before LMTST.
285 /* Mark mempool object as "put" since it is freed by NIX */
286 if (!send_hdr->w0.df)
287 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
291 static __rte_always_inline void
292 cn9k_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc,
293 const uint64_t ol_flags, const uint16_t no_segdw,
294 const uint16_t flags)
296 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
297 struct nix_send_mem_s *send_mem;
298 uint16_t off = (no_segdw - 1) << 1;
299 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
301 send_mem = (struct nix_send_mem_s *)(cmd + off);
302 if (flags & NIX_TX_MULTI_SEG_F) {
303 /* Retrieving the default desc values */
304 cmd[off] = send_mem_desc[6];
306 /* Using compiler barier to avoid voilation of C
309 rte_compiler_barrier();
312 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
313 * should not be recorded, hence changing the alg type to
314 * NIX_SENDMEMALG_SET and also changing send mem addr field to
315 * next 8 bytes as it corrpt the actual tx tstamp registered
318 send_mem->w0.cn9k.alg =
319 NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
321 send_mem->addr = (rte_iova_t)((uint64_t *)send_mem_desc[7] +
326 static __rte_always_inline void
327 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
328 const uint32_t flags)
333 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
334 lmt_status = roc_lmt_submit_ldeor(io_addr);
335 } while (lmt_status == 0);
338 static __rte_always_inline void
339 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
341 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
344 static __rte_always_inline uint64_t
345 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
347 return roc_lmt_submit_ldeor(io_addr);
350 static __rte_always_inline uint64_t
351 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
353 return roc_lmt_submit_ldeorl(io_addr);
356 static __rte_always_inline uint16_t
357 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
359 struct nix_send_hdr_s *send_hdr;
360 union nix_send_sg_s *sg;
361 struct rte_mbuf *m_next;
362 uint64_t *slist, sg_u;
367 send_hdr = (struct nix_send_hdr_s *)cmd;
368 send_hdr->w0.total = m->pkt_len;
369 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
371 if (flags & NIX_TX_NEED_EXT_HDR)
376 sg = (union nix_send_sg_s *)&cmd[2 + off];
377 /* Clear sg->u header before use */
378 sg->u &= 0xFC00000000000000;
380 slist = &cmd[3 + off];
383 nb_segs = m->nb_segs;
385 /* Fill mbuf segments */
388 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
389 *slist = rte_mbuf_data_iova(m);
390 /* Set invert df if buffer is not to be freed by H/W */
391 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
392 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
393 /* Commit changes to mbuf */
396 /* Mark mempool object as "put" since it is freed by NIX */
397 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
398 if (!(sg_u & (1ULL << (i + 55))))
399 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
405 if (i > 2 && nb_segs) {
407 /* Next SG subdesc */
408 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
411 sg = (union nix_send_sg_s *)slist;
420 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
421 /* Roundup extra dwords to multiple of 2 */
422 segdw = (segdw >> 1) + (segdw & 0x1);
424 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
425 send_hdr->w0.sizem1 = segdw - 1;
430 static __rte_always_inline void
431 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
433 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
436 static __rte_always_inline void
437 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
443 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
444 lmt_status = roc_lmt_submit_ldeor(io_addr);
445 } while (lmt_status == 0);
448 static __rte_always_inline void
449 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
450 rte_iova_t io_addr, uint16_t segdw)
456 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
457 lmt_status = roc_lmt_submit_ldeor(io_addr);
458 } while (lmt_status == 0);
461 static __rte_always_inline uint16_t
462 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
463 uint64_t *cmd, const uint16_t flags)
465 struct cn9k_eth_txq *txq = tx_queue;
466 const rte_iova_t io_addr = txq->io_addr;
467 void *lmt_addr = txq->lmt_addr;
468 uint64_t lso_tun_fmt;
471 NIX_XMIT_FC_OR_RETURN(txq, pkts);
473 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
475 /* Perform header writes before barrier for TSO */
476 if (flags & NIX_TX_OFFLOAD_TSO_F) {
477 lso_tun_fmt = txq->lso_tun_fmt;
479 for (i = 0; i < pkts; i++)
480 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
483 /* Lets commit any changes in the packet here as no further changes
484 * to the packet will be done unless no fast free is enabled.
486 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
489 for (i = 0; i < pkts; i++) {
490 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
491 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
492 tx_pkts[i]->ol_flags, 4, flags);
493 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
496 /* Reduce the cached count */
497 txq->fc_cache_pkts -= pkts;
502 static __rte_always_inline uint16_t
503 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
504 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
506 struct cn9k_eth_txq *txq = tx_queue;
507 const rte_iova_t io_addr = txq->io_addr;
508 void *lmt_addr = txq->lmt_addr;
509 uint64_t lso_tun_fmt;
513 NIX_XMIT_FC_OR_RETURN(txq, pkts);
515 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
517 /* Perform header writes before barrier for TSO */
518 if (flags & NIX_TX_OFFLOAD_TSO_F) {
519 lso_tun_fmt = txq->lso_tun_fmt;
521 for (i = 0; i < pkts; i++)
522 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
525 /* Lets commit any changes in the packet here as no further changes
526 * to the packet will be done unless no fast free is enabled.
528 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
531 for (i = 0; i < pkts; i++) {
532 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
533 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
534 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
535 tx_pkts[i]->ol_flags, segdw,
537 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
540 /* Reduce the cached count */
541 txq->fc_cache_pkts -= pkts;
546 #if defined(RTE_ARCH_ARM64)
548 static __rte_always_inline void
549 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
550 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
556 if (!(ol_flags & PKT_TX_TCP_SEG))
559 mask = -(!w1->il3type);
560 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
564 w0->lso_mps = m->tso_segsz;
565 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
566 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
568 /* Handle tunnel tso */
569 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
570 (ol_flags & PKT_TX_TUNNEL_MASK)) {
571 const uint8_t is_udp_tun =
572 (CNXK_NIX_UDP_TUN_BITMASK >>
573 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
576 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
577 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
578 /* Update format for UDP tunneled packet */
579 w0->lso_format += is_udp_tun ? 2 : 6;
581 w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
585 #define NIX_DESCS_PER_LOOP 4
586 static __rte_always_inline uint16_t
587 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
588 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
590 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
591 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
592 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
593 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
594 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
595 uint64x2_t senddesc01_w0, senddesc23_w0;
596 uint64x2_t senddesc01_w1, senddesc23_w1;
597 uint64x2_t sendext01_w0, sendext23_w0;
598 uint64x2_t sendext01_w1, sendext23_w1;
599 uint64x2_t sendmem01_w0, sendmem23_w0;
600 uint64x2_t sendmem01_w1, sendmem23_w1;
601 uint64x2_t sgdesc01_w0, sgdesc23_w0;
602 uint64x2_t sgdesc01_w1, sgdesc23_w1;
603 struct cn9k_eth_txq *txq = tx_queue;
604 uint64_t *lmt_addr = txq->lmt_addr;
605 rte_iova_t io_addr = txq->io_addr;
606 uint64x2_t ltypes01, ltypes23;
607 uint64x2_t xtmp128, ytmp128;
608 uint64x2_t xmask01, xmask23;
609 uint64_t lmt_status, i;
612 NIX_XMIT_FC_OR_RETURN(txq, pkts);
614 pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
615 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
617 /* Reduce the cached count */
618 txq->fc_cache_pkts -= pkts;
620 /* Perform header writes before barrier for TSO */
621 if (flags & NIX_TX_OFFLOAD_TSO_F) {
622 for (i = 0; i < pkts; i++)
623 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
626 /* Lets commit any changes in the packet here as no further changes
627 * to the packet will be done unless no fast free is enabled.
629 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
632 senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
633 senddesc23_w0 = senddesc01_w0;
634 senddesc01_w1 = vdupq_n_u64(0);
635 senddesc23_w1 = senddesc01_w1;
637 /* Load command defaults into vector variables. */
638 if (flags & NIX_TX_NEED_EXT_HDR) {
639 sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
640 sendext23_w0 = sendext01_w0;
641 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
642 sendext23_w1 = sendext01_w1;
643 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
644 sgdesc23_w0 = sgdesc01_w0;
645 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
646 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
647 sendmem23_w0 = sendmem01_w0;
648 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
649 sendmem23_w1 = sendmem01_w1;
652 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
653 sgdesc23_w0 = sgdesc01_w0;
656 for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
657 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
659 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
660 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
662 senddesc23_w0 = senddesc01_w0;
663 sgdesc23_w0 = sgdesc01_w0;
665 /* Clear vlan enables. */
666 if (flags & NIX_TX_NEED_EXT_HDR) {
667 sendext01_w1 = vbicq_u64(sendext01_w1,
668 vdupq_n_u64(0x3FFFF00FFFF00));
669 sendext23_w1 = sendext01_w1;
672 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
673 /* Reset send mem alg to SETTSTMP from SUB*/
674 sendmem01_w0 = vbicq_u64(sendmem01_w0,
675 vdupq_n_u64(BIT_ULL(59)));
676 /* Reset send mem address to default. */
678 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
679 sendmem23_w0 = sendmem01_w0;
680 sendmem23_w1 = sendmem01_w1;
683 if (flags & NIX_TX_OFFLOAD_TSO_F) {
684 /* Clear the LSO enable bit. */
685 sendext01_w0 = vbicq_u64(sendext01_w0,
686 vdupq_n_u64(BIT_ULL(14)));
687 sendext23_w0 = sendext01_w0;
690 /* Move mbufs to iova */
691 mbuf0 = (uint64_t *)tx_pkts[0];
692 mbuf1 = (uint64_t *)tx_pkts[1];
693 mbuf2 = (uint64_t *)tx_pkts[2];
694 mbuf3 = (uint64_t *)tx_pkts[3];
696 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
697 offsetof(struct rte_mbuf, buf_iova));
698 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
699 offsetof(struct rte_mbuf, buf_iova));
700 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
701 offsetof(struct rte_mbuf, buf_iova));
702 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
703 offsetof(struct rte_mbuf, buf_iova));
705 * Get mbuf's, olflags, iova, pktlen, dataoff
706 * dataoff_iovaX.D[0] = iova,
707 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
708 * len_olflagsX.D[0] = ol_flags,
709 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
711 dataoff_iova0 = vld1q_u64(mbuf0);
712 len_olflags0 = vld1q_u64(mbuf0 + 2);
713 dataoff_iova1 = vld1q_u64(mbuf1);
714 len_olflags1 = vld1q_u64(mbuf1 + 2);
715 dataoff_iova2 = vld1q_u64(mbuf2);
716 len_olflags2 = vld1q_u64(mbuf2 + 2);
717 dataoff_iova3 = vld1q_u64(mbuf3);
718 len_olflags3 = vld1q_u64(mbuf3 + 2);
720 /* Move mbufs to point pool */
721 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
722 offsetof(struct rte_mbuf, pool) -
723 offsetof(struct rte_mbuf, buf_iova));
724 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
725 offsetof(struct rte_mbuf, pool) -
726 offsetof(struct rte_mbuf, buf_iova));
727 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
728 offsetof(struct rte_mbuf, pool) -
729 offsetof(struct rte_mbuf, buf_iova));
730 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
731 offsetof(struct rte_mbuf, pool) -
732 offsetof(struct rte_mbuf, buf_iova));
734 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
735 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
736 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
738 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
739 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
742 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
743 : [a] "+w"(senddesc01_w1)
744 : [in] "r"(mbuf0 + 2)
747 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
748 : [a] "+w"(senddesc01_w1)
749 : [in] "r"(mbuf1 + 2)
752 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
753 : [b] "+w"(senddesc23_w1)
754 : [in] "r"(mbuf2 + 2)
757 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
758 : [b] "+w"(senddesc23_w1)
759 : [in] "r"(mbuf3 + 2)
762 /* Get pool pointer alone */
763 mbuf0 = (uint64_t *)*mbuf0;
764 mbuf1 = (uint64_t *)*mbuf1;
765 mbuf2 = (uint64_t *)*mbuf2;
766 mbuf3 = (uint64_t *)*mbuf3;
768 /* Get pool pointer alone */
769 mbuf0 = (uint64_t *)*mbuf0;
770 mbuf1 = (uint64_t *)*mbuf1;
771 mbuf2 = (uint64_t *)*mbuf2;
772 mbuf3 = (uint64_t *)*mbuf3;
775 const uint8x16_t shuf_mask2 = {
776 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
777 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
779 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
780 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
782 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
783 const uint64x2_t and_mask0 = {
788 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
789 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
790 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
791 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
794 * Pick only 16 bits of pktlen preset at bits 63:32
795 * and place them at bits 15:0.
797 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
798 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
800 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
801 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
802 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
804 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
805 * pktlen at 15:0 position.
807 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
808 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
809 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
810 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
812 /* Move mbuf to point to pool_id. */
813 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
814 offsetof(struct rte_mempool, pool_id));
815 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
816 offsetof(struct rte_mempool, pool_id));
817 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
818 offsetof(struct rte_mempool, pool_id));
819 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
820 offsetof(struct rte_mempool, pool_id));
822 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
823 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
825 * Lookup table to translate ol_flags to
826 * il3/il4 types. But we still use ol3/ol4 types in
827 * senddesc_w1 as only one header processing is enabled.
829 const uint8x16_t tbl = {
830 /* [0-15] = il4type:il3type */
831 0x04, /* none (IPv6 assumed) */
832 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
833 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
834 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
835 0x03, /* PKT_TX_IP_CKSUM */
836 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
837 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
838 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
839 0x02, /* PKT_TX_IPV4 */
840 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
841 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
842 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
843 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
844 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
847 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
850 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
855 /* Extract olflags to translate to iltypes */
856 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
857 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
860 * E(47):L3_LEN(9):L2_LEN(7+z)
861 * E(47):L3_LEN(9):L2_LEN(7+z)
863 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
864 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
866 /* Move OLFLAGS bits 55:52 to 51:48
867 * with zeros preprended on the byte and rest
870 xtmp128 = vshrq_n_u8(xtmp128, 4);
871 ytmp128 = vshrq_n_u8(ytmp128, 4);
873 * E(48):L3_LEN(8):L2_LEN(z+7)
874 * E(48):L3_LEN(8):L2_LEN(z+7)
876 const int8x16_t tshft3 = {
877 -1, 0, 8, 8, 8, 8, 8, 8,
878 -1, 0, 8, 8, 8, 8, 8, 8,
881 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
882 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
885 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
886 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
888 /* Pick only relevant fields i.e Bit 48:55 of iltype
889 * and place it in ol3/ol4type of senddesc_w1
891 const uint8x16_t shuf_mask0 = {
892 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
893 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
896 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
897 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
899 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
900 * a [E(32):E(16):OL3(8):OL2(8)]
902 * a [E(32):E(16):(OL3+OL2):OL2]
903 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
905 senddesc01_w1 = vaddq_u8(senddesc01_w1,
906 vshlq_n_u16(senddesc01_w1, 8));
907 senddesc23_w1 = vaddq_u8(senddesc23_w1,
908 vshlq_n_u16(senddesc23_w1, 8));
910 /* Move ltypes to senddesc*_w1 */
911 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
912 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
913 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
914 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
916 * Lookup table to translate ol_flags to
920 const uint8x16_t tbl = {
921 /* [0-15] = ol4type:ol3type */
923 0x03, /* OUTER_IP_CKSUM */
924 0x02, /* OUTER_IPV4 */
925 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
926 0x04, /* OUTER_IPV6 */
927 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
928 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
929 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
932 0x00, /* OUTER_UDP_CKSUM */
933 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
934 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
935 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
938 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
939 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
942 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
945 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
946 * OUTER_IPV4 | OUTER_IP_CKSUM
950 /* Extract olflags to translate to iltypes */
951 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
952 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
955 * E(47):OL3_LEN(9):OL2_LEN(7+z)
956 * E(47):OL3_LEN(9):OL2_LEN(7+z)
958 const uint8x16_t shuf_mask5 = {
959 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
960 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
962 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
963 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
965 /* Extract outer ol flags only */
966 const uint64x2_t o_cksum_mask = {
971 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
972 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
974 /* Extract OUTER_UDP_CKSUM bit 41 and
978 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
979 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
981 /* Shift oltype by 2 to start nibble from BIT(56)
984 xtmp128 = vshrq_n_u8(xtmp128, 2);
985 ytmp128 = vshrq_n_u8(ytmp128, 2);
987 * E(48):L3_LEN(8):L2_LEN(z+7)
988 * E(48):L3_LEN(8):L2_LEN(z+7)
990 const int8x16_t tshft3 = {
991 -1, 0, 8, 8, 8, 8, 8, 8,
992 -1, 0, 8, 8, 8, 8, 8, 8,
995 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
996 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
999 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1000 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1002 /* Pick only relevant fields i.e Bit 56:63 of oltype
1003 * and place it in ol3/ol4type of senddesc_w1
1005 const uint8x16_t shuf_mask0 = {
1006 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1007 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1010 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1011 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1013 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1014 * a [E(32):E(16):OL3(8):OL2(8)]
1016 * a [E(32):E(16):(OL3+OL2):OL2]
1017 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1019 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1020 vshlq_n_u16(senddesc01_w1, 8));
1021 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1022 vshlq_n_u16(senddesc23_w1, 8));
1024 /* Move ltypes to senddesc*_w1 */
1025 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1026 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1027 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1028 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1029 /* Lookup table to translate ol_flags to
1030 * ol4type, ol3type, il4type, il3type of senddesc_w1
1032 const uint8x16x2_t tbl = {{
1034 /* [0-15] = il4type:il3type */
1035 0x04, /* none (IPv6) */
1036 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1037 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1038 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1039 0x03, /* PKT_TX_IP_CKSUM */
1040 0x13, /* PKT_TX_IP_CKSUM |
1043 0x23, /* PKT_TX_IP_CKSUM |
1046 0x33, /* PKT_TX_IP_CKSUM |
1049 0x02, /* PKT_TX_IPV4 */
1050 0x12, /* PKT_TX_IPV4 |
1053 0x22, /* PKT_TX_IPV4 |
1056 0x32, /* PKT_TX_IPV4 |
1059 0x03, /* PKT_TX_IPV4 |
1062 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1065 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1068 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1074 /* [16-31] = ol4type:ol3type */
1076 0x03, /* OUTER_IP_CKSUM */
1077 0x02, /* OUTER_IPV4 */
1078 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1079 0x04, /* OUTER_IPV6 */
1080 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1081 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1082 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1085 0x00, /* OUTER_UDP_CKSUM */
1086 0x33, /* OUTER_UDP_CKSUM |
1089 0x32, /* OUTER_UDP_CKSUM |
1092 0x33, /* OUTER_UDP_CKSUM |
1093 * OUTER_IPV4 | OUTER_IP_CKSUM
1095 0x34, /* OUTER_UDP_CKSUM |
1098 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1101 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1104 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1105 * OUTER_IPV4 | OUTER_IP_CKSUM
1110 /* Extract olflags to translate to oltype & iltype */
1111 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1112 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1115 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1116 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1118 const uint32x4_t tshft_4 = {
1124 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1125 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1128 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1129 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1131 const uint8x16_t shuf_mask5 = {
1132 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1133 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1135 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1136 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1138 /* Extract outer and inner header ol_flags */
1139 const uint64x2_t oi_cksum_mask = {
1144 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1145 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1147 /* Extract OUTER_UDP_CKSUM bit 41 and
1151 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1152 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1154 /* Shift right oltype by 2 and iltype by 4
1155 * to start oltype nibble from BIT(58)
1156 * instead of BIT(56) and iltype nibble from BIT(48)
1157 * instead of BIT(52).
1159 const int8x16_t tshft5 = {
1160 8, 8, 8, 8, 8, 8, -4, -2,
1161 8, 8, 8, 8, 8, 8, -4, -2,
1164 xtmp128 = vshlq_u8(xtmp128, tshft5);
1165 ytmp128 = vshlq_u8(ytmp128, tshft5);
1167 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1168 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1170 const int8x16_t tshft3 = {
1171 -1, 0, -1, 0, 0, 0, 0, 0,
1172 -1, 0, -1, 0, 0, 0, 0, 0,
1175 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1176 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1178 /* Mark Bit(4) of oltype */
1179 const uint64x2_t oi_cksum_mask2 = {
1184 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1185 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1188 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1189 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1191 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1192 * Bit 56:63 of oltype and place it in corresponding
1193 * place in senddesc_w1.
1195 const uint8x16_t shuf_mask0 = {
1196 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1197 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1200 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1201 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1203 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1204 * l3len, l2len, ol3len, ol2len.
1205 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1207 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1209 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1210 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1212 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1213 vshlq_n_u32(senddesc01_w1, 8));
1214 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1215 vshlq_n_u32(senddesc23_w1, 8));
1217 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1218 senddesc01_w1 = vaddq_u8(
1219 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1220 senddesc23_w1 = vaddq_u8(
1221 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1223 /* Move ltypes to senddesc*_w1 */
1224 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1225 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1228 xmask01 = vdupq_n_u64(0);
1230 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1235 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1240 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1245 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1249 xmask01 = vshlq_n_u64(xmask01, 20);
1250 xmask23 = vshlq_n_u64(xmask23, 20);
1252 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1253 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1255 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1256 /* Tx ol_flag for vlan. */
1257 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1258 /* Bit enable for VLAN1 */
1259 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1260 /* Tx ol_flag for QnQ. */
1261 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1262 /* Bit enable for VLAN0 */
1263 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1264 /* Load vlan values from packet. outer is VLAN 0 */
1265 uint64x2_t ext01 = {
1266 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1267 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1268 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1269 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1271 uint64x2_t ext23 = {
1272 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1273 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1274 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1275 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1278 /* Get ol_flags of the packets. */
1279 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1280 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1282 /* ORR vlan outer/inner values into cmd. */
1283 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1284 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1286 /* Test for offload enable bits and generate masks. */
1287 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1289 vandq_u64(vtstq_u64(xtmp128, olq),
1291 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1293 vandq_u64(vtstq_u64(ytmp128, olq),
1296 /* Set vlan enable bits into cmd based on mask. */
1297 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1298 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1301 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1302 /* Tx ol_flag for timestam. */
1303 const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
1304 PKT_TX_IEEE1588_TMST};
1305 /* Set send mem alg to SUB. */
1306 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1307 /* Increment send mem address by 8. */
1308 const uint64x2_t addr = {0x8, 0x8};
1310 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1311 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1313 /* Check if timestamp is requested and generate inverted
1314 * mask as we need not make any changes to default cmd
1317 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1318 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1320 /* Change send mem address to an 8 byte offset when
1321 * TSTMP is disabled.
1323 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1324 vandq_u64(xtmp128, addr));
1325 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1326 vandq_u64(ytmp128, addr));
1327 /* Change send mem alg to SUB when TSTMP is disabled. */
1328 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1329 vandq_u64(xtmp128, alg));
1330 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1331 vandq_u64(ytmp128, alg));
1333 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1334 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1335 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1336 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1339 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1340 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1341 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1343 /* Extract SD W1 as we need to set L4 types. */
1344 vst1q_u64(sd_w1, senddesc01_w1);
1345 vst1q_u64(sd_w1 + 2, senddesc23_w1);
1347 /* Extract SX W0 as we need to set LSO fields. */
1348 vst1q_u64(sx_w0, sendext01_w0);
1349 vst1q_u64(sx_w0 + 2, sendext23_w0);
1351 /* Extract ol_flags. */
1352 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1353 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1355 /* Prepare individual mbufs. */
1356 cn9k_nix_prepare_tso(tx_pkts[0],
1357 (union nix_send_hdr_w1_u *)&sd_w1[0],
1358 (union nix_send_ext_w0_u *)&sx_w0[0],
1359 vgetq_lane_u64(xtmp128, 0), flags);
1361 cn9k_nix_prepare_tso(tx_pkts[1],
1362 (union nix_send_hdr_w1_u *)&sd_w1[1],
1363 (union nix_send_ext_w0_u *)&sx_w0[1],
1364 vgetq_lane_u64(xtmp128, 1), flags);
1366 cn9k_nix_prepare_tso(tx_pkts[2],
1367 (union nix_send_hdr_w1_u *)&sd_w1[2],
1368 (union nix_send_ext_w0_u *)&sx_w0[2],
1369 vgetq_lane_u64(ytmp128, 0), flags);
1371 cn9k_nix_prepare_tso(tx_pkts[3],
1372 (union nix_send_hdr_w1_u *)&sd_w1[3],
1373 (union nix_send_ext_w0_u *)&sx_w0[3],
1374 vgetq_lane_u64(ytmp128, 1), flags);
1376 senddesc01_w1 = vld1q_u64(sd_w1);
1377 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1379 sendext01_w0 = vld1q_u64(sx_w0);
1380 sendext23_w0 = vld1q_u64(sx_w0 + 2);
1383 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1384 /* Set don't free bit if reference count > 1 */
1385 xmask01 = vdupq_n_u64(0);
1388 /* Move mbufs to iova */
1389 mbuf0 = (uint64_t *)tx_pkts[0];
1390 mbuf1 = (uint64_t *)tx_pkts[1];
1391 mbuf2 = (uint64_t *)tx_pkts[2];
1392 mbuf3 = (uint64_t *)tx_pkts[3];
1394 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1395 vsetq_lane_u64(0x80000, xmask01, 0);
1397 __mempool_check_cookies(
1398 ((struct rte_mbuf *)mbuf0)->pool,
1399 (void **)&mbuf0, 1, 0);
1401 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1402 vsetq_lane_u64(0x80000, xmask01, 1);
1404 __mempool_check_cookies(
1405 ((struct rte_mbuf *)mbuf1)->pool,
1406 (void **)&mbuf1, 1, 0);
1408 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1409 vsetq_lane_u64(0x80000, xmask23, 0);
1411 __mempool_check_cookies(
1412 ((struct rte_mbuf *)mbuf2)->pool,
1413 (void **)&mbuf2, 1, 0);
1415 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1416 vsetq_lane_u64(0x80000, xmask23, 1);
1418 __mempool_check_cookies(
1419 ((struct rte_mbuf *)mbuf3)->pool,
1420 (void **)&mbuf3, 1, 0);
1421 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1422 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1423 /* Ensuring mbuf fields which got updated in
1424 * cnxk_nix_prefree_seg are written before LMTST.
1428 /* Move mbufs to iova */
1429 mbuf0 = (uint64_t *)tx_pkts[0];
1430 mbuf1 = (uint64_t *)tx_pkts[1];
1431 mbuf2 = (uint64_t *)tx_pkts[2];
1432 mbuf3 = (uint64_t *)tx_pkts[3];
1434 /* Mark mempool object as "put" since
1435 * it is freed by NIX
1437 __mempool_check_cookies(
1438 ((struct rte_mbuf *)mbuf0)->pool,
1439 (void **)&mbuf0, 1, 0);
1441 __mempool_check_cookies(
1442 ((struct rte_mbuf *)mbuf1)->pool,
1443 (void **)&mbuf1, 1, 0);
1445 __mempool_check_cookies(
1446 ((struct rte_mbuf *)mbuf2)->pool,
1447 (void **)&mbuf2, 1, 0);
1449 __mempool_check_cookies(
1450 ((struct rte_mbuf *)mbuf3)->pool,
1451 (void **)&mbuf3, 1, 0);
1452 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1457 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1458 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1459 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1460 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1461 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1463 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1464 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1465 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1466 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1468 if (flags & NIX_TX_NEED_EXT_HDR) {
1469 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1470 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1471 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1472 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1475 if (flags & NIX_TX_NEED_EXT_HDR) {
1476 /* With ext header in the command we can no longer send
1477 * all 4 packets together since LMTLINE is 128bytes.
1478 * Split and Tx twice.
1481 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1482 vst1q_u64(lmt_addr, cmd0[0]);
1483 vst1q_u64(lmt_addr + 2, cmd2[0]);
1484 vst1q_u64(lmt_addr + 4, cmd1[0]);
1485 vst1q_u64(lmt_addr + 6, cmd3[0]);
1486 vst1q_u64(lmt_addr + 8, cmd0[1]);
1487 vst1q_u64(lmt_addr + 10, cmd2[1]);
1488 vst1q_u64(lmt_addr + 12, cmd1[1]);
1489 vst1q_u64(lmt_addr + 14, cmd3[1]);
1491 vst1q_u64(lmt_addr, cmd0[0]);
1492 vst1q_u64(lmt_addr + 2, cmd2[0]);
1493 vst1q_u64(lmt_addr + 4, cmd1[0]);
1494 vst1q_u64(lmt_addr + 6, cmd0[1]);
1495 vst1q_u64(lmt_addr + 8, cmd2[1]);
1496 vst1q_u64(lmt_addr + 10, cmd1[1]);
1498 lmt_status = roc_lmt_submit_ldeor(io_addr);
1499 } while (lmt_status == 0);
1502 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1503 vst1q_u64(lmt_addr, cmd0[2]);
1504 vst1q_u64(lmt_addr + 2, cmd2[2]);
1505 vst1q_u64(lmt_addr + 4, cmd1[2]);
1506 vst1q_u64(lmt_addr + 6, cmd3[2]);
1507 vst1q_u64(lmt_addr + 8, cmd0[3]);
1508 vst1q_u64(lmt_addr + 10, cmd2[3]);
1509 vst1q_u64(lmt_addr + 12, cmd1[3]);
1510 vst1q_u64(lmt_addr + 14, cmd3[3]);
1512 vst1q_u64(lmt_addr, cmd0[2]);
1513 vst1q_u64(lmt_addr + 2, cmd2[2]);
1514 vst1q_u64(lmt_addr + 4, cmd1[2]);
1515 vst1q_u64(lmt_addr + 6, cmd0[3]);
1516 vst1q_u64(lmt_addr + 8, cmd2[3]);
1517 vst1q_u64(lmt_addr + 10, cmd1[3]);
1519 lmt_status = roc_lmt_submit_ldeor(io_addr);
1520 } while (lmt_status == 0);
1523 vst1q_u64(lmt_addr, cmd0[0]);
1524 vst1q_u64(lmt_addr + 2, cmd1[0]);
1525 vst1q_u64(lmt_addr + 4, cmd0[1]);
1526 vst1q_u64(lmt_addr + 6, cmd1[1]);
1527 vst1q_u64(lmt_addr + 8, cmd0[2]);
1528 vst1q_u64(lmt_addr + 10, cmd1[2]);
1529 vst1q_u64(lmt_addr + 12, cmd0[3]);
1530 vst1q_u64(lmt_addr + 14, cmd1[3]);
1531 lmt_status = roc_lmt_submit_ldeor(io_addr);
1532 } while (lmt_status == 0);
1534 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1537 if (unlikely(pkts_left))
1538 pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,
1545 static __rte_always_inline uint16_t
1546 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1547 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1549 RTE_SET_USED(tx_queue);
1550 RTE_SET_USED(tx_pkts);
1553 RTE_SET_USED(flags);
1558 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1559 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1560 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1561 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1562 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1563 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1565 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1566 #define NIX_TX_FASTPATH_MODES \
1567 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
1568 NIX_TX_OFFLOAD_NONE) \
1569 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
1571 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
1573 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
1574 OL3OL4CSUM_F | L3L4CSUM_F) \
1575 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
1577 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
1578 VLAN_F | L3L4CSUM_F) \
1579 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
1580 VLAN_F | OL3OL4CSUM_F) \
1581 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
1582 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1583 T(noff, 0, 0, 1, 0, 0, 0, 4, \
1585 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
1586 NOFF_F | L3L4CSUM_F) \
1587 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
1588 NOFF_F | OL3OL4CSUM_F) \
1589 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
1590 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1591 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
1593 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
1594 NOFF_F | VLAN_F | L3L4CSUM_F) \
1595 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
1596 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1597 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
1598 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1599 T(tso, 0, 1, 0, 0, 0, 0, 6, \
1601 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
1602 TSO_F | L3L4CSUM_F) \
1603 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
1604 TSO_F | OL3OL4CSUM_F) \
1605 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
1606 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1607 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
1609 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
1610 TSO_F | VLAN_F | L3L4CSUM_F) \
1611 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
1612 TSO_F | VLAN_F | OL3OL4CSUM_F) \
1613 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
1614 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1615 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
1617 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
1618 TSO_F | NOFF_F | L3L4CSUM_F) \
1619 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
1620 TSO_F | NOFF_F | OL3OL4CSUM_F) \
1621 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
1622 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1623 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
1624 TSO_F | NOFF_F | VLAN_F) \
1625 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
1626 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1627 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
1628 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1629 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
1630 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1631 T(ts, 1, 0, 0, 0, 0, 0, 8, \
1633 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
1634 TSP_F | L3L4CSUM_F) \
1635 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
1636 TSP_F | OL3OL4CSUM_F) \
1637 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
1638 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1639 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
1641 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
1642 TSP_F | VLAN_F | L3L4CSUM_F) \
1643 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
1644 TSP_F | VLAN_F | OL3OL4CSUM_F) \
1645 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
1646 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1647 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
1649 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
1650 TSP_F | NOFF_F | L3L4CSUM_F) \
1651 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
1652 TSP_F | NOFF_F | OL3OL4CSUM_F) \
1653 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
1654 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1655 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
1656 TSP_F | NOFF_F | VLAN_F) \
1657 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
1658 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1659 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
1660 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1661 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
1662 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1663 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
1665 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
1666 TSP_F | TSO_F | L3L4CSUM_F) \
1667 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
1668 TSP_F | TSO_F | OL3OL4CSUM_F) \
1669 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
1670 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1671 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
1672 TSP_F | TSO_F | VLAN_F) \
1673 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
1674 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1675 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
1676 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1677 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
1678 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1679 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
1680 TSP_F | TSO_F | NOFF_F) \
1681 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
1682 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1683 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
1684 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1685 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
1686 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1687 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
1688 TSP_F | TSO_F | NOFF_F | VLAN_F) \
1689 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
1690 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1691 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
1692 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1693 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
1694 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1696 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
1697 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name( \
1698 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1700 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name( \
1701 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1703 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
1704 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1706 NIX_TX_FASTPATH_MODES
1709 #endif /* __CN9K_TX_H__ */