1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
16 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
17 #define NIX_TX_OFFLOAD_MAX (NIX_TX_OFFLOAD_SECURITY_F << 1)
19 /* Flags to control xmit_prepare function.
20 * Defining it from backwards to denote its been
21 * not used as offload flags to pick function
23 #define NIX_TX_MULTI_SEG_F BIT(15)
25 #define NIX_TX_NEED_SEND_HDR_W1 \
26 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
27 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
29 #define NIX_TX_NEED_EXT_HDR \
30 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
35 /* Cached value is low, Update the fc_cache_pkts */ \
36 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
37 /* Multiply with sqe_per_sqb to express in pkts */ \
38 (txq)->fc_cache_pkts = \
39 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
40 << (txq)->sqes_per_sqb_log2; \
41 /* Check it again for the room */ \
42 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
47 /* Function to determine no of tx subdesc required in case ext
48 * sub desc is enabled.
50 static __rte_always_inline int
51 cn9k_nix_tx_ext_subs(const uint16_t flags)
53 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
56 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
61 static __rte_always_inline void
62 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
64 uint64_t mask, ol_flags = m->ol_flags;
66 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
67 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
68 uint16_t *iplen, *oiplen, *oudplen;
69 uint16_t lso_sb, paylen;
71 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
72 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
73 m->l2_len + m->l3_len + m->l4_len;
75 /* Reduce payload len from base headers */
76 paylen = m->pkt_len - lso_sb;
78 /* Get iplen position assuming no tunnel hdr */
79 iplen = (uint16_t *)(mdata + m->l2_len +
80 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
81 /* Handle tunnel tso */
82 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
83 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
84 const uint8_t is_udp_tun =
85 (CNXK_NIX_UDP_TUN_BITMASK >>
86 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
89 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
91 RTE_MBUF_F_TX_OUTER_IPV6)));
92 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
95 /* Update format for UDP tunneled packet */
97 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
99 *oudplen = rte_cpu_to_be_16(
100 rte_be_to_cpu_16(*oudplen) - paylen);
103 /* Update iplen position to inner ip hdr */
104 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
106 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
109 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
113 static __rte_always_inline void
114 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
115 const uint64_t lso_tun_fmt)
117 struct nix_send_ext_s *send_hdr_ext;
118 struct nix_send_hdr_s *send_hdr;
119 uint64_t ol_flags = 0, mask;
120 union nix_send_hdr_w1_u w1;
121 union nix_send_sg_s *sg;
123 send_hdr = (struct nix_send_hdr_s *)cmd;
124 if (flags & NIX_TX_NEED_EXT_HDR) {
125 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
126 sg = (union nix_send_sg_s *)(cmd + 4);
127 /* Clear previous markings */
128 send_hdr_ext->w0.lso = 0;
129 send_hdr_ext->w1.u = 0;
131 sg = (union nix_send_sg_s *)(cmd + 2);
134 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
135 ol_flags = m->ol_flags;
139 if (!(flags & NIX_TX_MULTI_SEG_F)) {
140 send_hdr->w0.total = m->data_len;
142 roc_npa_aura_handle_to_aura(m->pool->pool_id);
147 * 3 => IPV4 with csum
149 * L3type and L3ptr needs to be set for either
150 * L3 csum or L4 csum or LSO
154 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
155 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
156 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
157 const uint8_t ol3type =
158 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
159 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
160 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
163 w1.ol3type = ol3type;
164 mask = 0xffffull << ((!!ol3type) << 4);
165 w1.ol3ptr = ~mask & m->outer_l2_len;
166 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
169 w1.ol4type = csum + (csum << 1);
172 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
173 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
174 w1.il3ptr = w1.ol4ptr + m->l2_len;
175 w1.il4ptr = w1.il3ptr + m->l3_len;
176 /* Increment it by 1 if it is IPV4 as 3 is with csum */
177 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
180 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
182 /* In case of no tunnel header use only
183 * shift IL3/IL4 fields a bit to use
184 * OL3/OL4 for header checksum
187 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
188 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
190 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
191 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
192 const uint8_t outer_l2_len = m->outer_l2_len;
195 w1.ol3ptr = outer_l2_len;
196 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
197 /* Increment it by 1 if it is IPV4 as 3 is with csum */
198 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
199 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
200 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
203 w1.ol4type = csum + (csum << 1);
205 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
206 const uint8_t l2_len = m->l2_len;
208 /* Always use OLXPTR and OLXTYPE when only
209 * when one header is present
214 w1.ol4ptr = l2_len + m->l3_len;
215 /* Increment it by 1 if it is IPV4 as 3 is with csum */
216 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
217 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
218 !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
221 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
224 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
225 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
226 /* HW will update ptr after vlan0 update */
227 send_hdr_ext->w1.vlan1_ins_ptr = 12;
228 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
230 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
231 /* 2B before end of l2 header */
232 send_hdr_ext->w1.vlan0_ins_ptr = 12;
233 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
236 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
240 mask = -(!w1.il3type);
241 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
243 send_hdr_ext->w0.lso_sb = lso_sb;
244 send_hdr_ext->w0.lso = 1;
245 send_hdr_ext->w0.lso_mps = m->tso_segsz;
246 send_hdr_ext->w0.lso_format =
247 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
248 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
250 /* Handle tunnel tso */
251 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
252 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
253 const uint8_t is_udp_tun =
254 (CNXK_NIX_UDP_TUN_BITMASK >>
255 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
257 uint8_t shift = is_udp_tun ? 32 : 0;
259 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
260 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
262 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
263 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
264 /* Update format for UDP tunneled packet */
265 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
269 if (flags & NIX_TX_NEED_SEND_HDR_W1)
270 send_hdr->w1.u = w1.u;
272 if (!(flags & NIX_TX_MULTI_SEG_F)) {
273 sg->seg1_size = m->data_len;
274 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
276 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
277 /* DF bit = 1 if refcount of current mbuf or parent mbuf
279 * DF bit = 0 otherwise
281 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
282 /* Ensuring mbuf fields which got updated in
283 * cnxk_nix_prefree_seg are written before LMTST.
287 /* Mark mempool object as "put" since it is freed by NIX */
288 if (!send_hdr->w0.df)
289 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
293 static __rte_always_inline void
294 cn9k_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc,
295 const uint64_t ol_flags, const uint16_t no_segdw,
296 const uint16_t flags)
298 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
299 struct nix_send_mem_s *send_mem;
300 uint16_t off = (no_segdw - 1) << 1;
301 const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
303 send_mem = (struct nix_send_mem_s *)(cmd + off);
304 if (flags & NIX_TX_MULTI_SEG_F) {
305 /* Retrieving the default desc values */
306 cmd[off] = send_mem_desc[6];
308 /* Using compiler barrier to avoid violation of C
311 rte_compiler_barrier();
314 /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
315 * should not be recorded, hence changing the alg type to
316 * NIX_SENDMEMALG_SET and also changing send mem addr field to
317 * next 8 bytes as it corrupts the actual Tx tstamp registered
320 send_mem->w0.cn9k.alg =
321 NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
323 send_mem->addr = (rte_iova_t)((uint64_t *)send_mem_desc[7] +
328 static __rte_always_inline void
329 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
330 const uint32_t flags)
335 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
336 lmt_status = roc_lmt_submit_ldeor(io_addr);
337 } while (lmt_status == 0);
340 static __rte_always_inline void
341 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
343 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
346 static __rte_always_inline uint64_t
347 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
349 return roc_lmt_submit_ldeor(io_addr);
352 static __rte_always_inline uint64_t
353 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
355 return roc_lmt_submit_ldeorl(io_addr);
358 static __rte_always_inline uint16_t
359 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
361 struct nix_send_hdr_s *send_hdr;
362 union nix_send_sg_s *sg;
363 struct rte_mbuf *m_next;
364 uint64_t *slist, sg_u;
369 send_hdr = (struct nix_send_hdr_s *)cmd;
370 send_hdr->w0.total = m->pkt_len;
371 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
373 if (flags & NIX_TX_NEED_EXT_HDR)
378 sg = (union nix_send_sg_s *)&cmd[2 + off];
379 /* Clear sg->u header before use */
380 sg->u &= 0xFC00000000000000;
382 slist = &cmd[3 + off];
385 nb_segs = m->nb_segs;
387 /* Fill mbuf segments */
390 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
391 *slist = rte_mbuf_data_iova(m);
392 /* Set invert df if buffer is not to be freed by H/W */
393 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
394 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
395 /* Commit changes to mbuf */
398 /* Mark mempool object as "put" since it is freed by NIX */
399 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
400 if (!(sg_u & (1ULL << (i + 55))))
401 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
407 if (i > 2 && nb_segs) {
409 /* Next SG subdesc */
410 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
413 sg = (union nix_send_sg_s *)slist;
422 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
423 /* Roundup extra dwords to multiple of 2 */
424 segdw = (segdw >> 1) + (segdw & 0x1);
426 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
427 send_hdr->w0.sizem1 = segdw - 1;
432 static __rte_always_inline void
433 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
435 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
438 static __rte_always_inline void
439 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
445 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
446 lmt_status = roc_lmt_submit_ldeor(io_addr);
447 } while (lmt_status == 0);
450 static __rte_always_inline void
451 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
452 rte_iova_t io_addr, uint16_t segdw)
458 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
459 lmt_status = roc_lmt_submit_ldeor(io_addr);
460 } while (lmt_status == 0);
463 static __rte_always_inline uint16_t
464 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
465 uint64_t *cmd, const uint16_t flags)
467 struct cn9k_eth_txq *txq = tx_queue;
468 const rte_iova_t io_addr = txq->io_addr;
469 void *lmt_addr = txq->lmt_addr;
470 uint64_t lso_tun_fmt;
473 NIX_XMIT_FC_OR_RETURN(txq, pkts);
475 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
477 /* Perform header writes before barrier for TSO */
478 if (flags & NIX_TX_OFFLOAD_TSO_F) {
479 lso_tun_fmt = txq->lso_tun_fmt;
481 for (i = 0; i < pkts; i++)
482 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
485 /* Lets commit any changes in the packet here as no further changes
486 * to the packet will be done unless no fast free is enabled.
488 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
491 for (i = 0; i < pkts; i++) {
492 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
493 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
494 tx_pkts[i]->ol_flags, 4, flags);
495 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
498 /* Reduce the cached count */
499 txq->fc_cache_pkts -= pkts;
504 static __rte_always_inline uint16_t
505 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
506 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
508 struct cn9k_eth_txq *txq = tx_queue;
509 const rte_iova_t io_addr = txq->io_addr;
510 void *lmt_addr = txq->lmt_addr;
511 uint64_t lso_tun_fmt;
515 NIX_XMIT_FC_OR_RETURN(txq, pkts);
517 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
519 /* Perform header writes before barrier for TSO */
520 if (flags & NIX_TX_OFFLOAD_TSO_F) {
521 lso_tun_fmt = txq->lso_tun_fmt;
523 for (i = 0; i < pkts; i++)
524 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
527 /* Lets commit any changes in the packet here as no further changes
528 * to the packet will be done unless no fast free is enabled.
530 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
533 for (i = 0; i < pkts; i++) {
534 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
535 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
536 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
537 tx_pkts[i]->ol_flags, segdw,
539 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
542 /* Reduce the cached count */
543 txq->fc_cache_pkts -= pkts;
548 #if defined(RTE_ARCH_ARM64)
550 static __rte_always_inline void
551 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
552 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
558 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
561 mask = -(!w1->il3type);
562 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
566 w0->lso_mps = m->tso_segsz;
567 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
568 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
570 /* Handle tunnel tso */
571 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
572 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
573 const uint8_t is_udp_tun =
574 (CNXK_NIX_UDP_TUN_BITMASK >>
575 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
578 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
579 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
580 /* Update format for UDP tunneled packet */
581 w0->lso_format += is_udp_tun ? 2 : 6;
583 w0->lso_format += !!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 1;
587 static __rte_always_inline uint8_t
588 cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
589 union nix_send_hdr_w0_u *sh,
590 union nix_send_sg_s *sg, const uint32_t flags)
592 struct rte_mbuf *m_next;
593 uint64_t *slist, sg_u;
598 sh->total = m->pkt_len;
599 /* Clear sg->u header before use */
600 sg->u &= 0xFC00000000000000;
604 sg_u = sg_u | ((uint64_t)m->data_len);
606 nb_segs = m->nb_segs - 1;
609 /* Set invert df if buffer is not to be freed by H/W */
610 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
611 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
612 /* Mark mempool object as "put" since it is freed by NIX */
613 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
614 if (!(sg_u & (1ULL << 55)))
615 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
620 /* Fill mbuf segments */
623 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
624 *slist = rte_mbuf_data_iova(m);
625 /* Set invert df if buffer is not to be freed by H/W */
626 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
627 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
628 /* Mark mempool object as "put" since it is freed by NIX
630 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
631 if (!(sg_u & (1ULL << (i + 55))))
632 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
638 if (i > 2 && nb_segs) {
640 /* Next SG subdesc */
641 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
644 sg = (union nix_send_sg_s *)slist;
653 segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
656 /* Roundup extra dwords to multiple of 2 */
657 segdw = (segdw >> 1) + (segdw & 0x1);
659 segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
660 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
661 sh->sizem1 = segdw - 1;
666 static __rte_always_inline uint8_t
667 cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
668 uint64x2_t *cmd1, const uint32_t flags)
670 union nix_send_hdr_w0_u sh;
671 union nix_send_sg_s sg;
674 if (m->nb_segs == 1) {
675 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
676 sg.u = vgetq_lane_u64(cmd1[0], 0);
677 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
678 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
681 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
682 sg.u = vgetq_lane_u64(cmd1[0], 0);
683 if (!(sg.u & (1ULL << 55)))
684 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
687 return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
688 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
691 sh.u = vgetq_lane_u64(cmd0[0], 0);
692 sg.u = vgetq_lane_u64(cmd1[0], 0);
694 ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
696 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
697 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
701 #define NIX_DESCS_PER_LOOP 4
703 static __rte_always_inline void
704 cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
705 uint64x2_t *cmd2, uint64x2_t *cmd3,
707 uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
708 uint64_t *lmt_addr, rte_iova_t io_addr,
709 const uint32_t flags)
714 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
715 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
716 /* No segments in 4 consecutive packets. */
717 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
719 vst1q_u64(lmt_addr, cmd0[0]);
720 vst1q_u64(lmt_addr + 2, cmd1[0]);
721 vst1q_u64(lmt_addr + 4, cmd0[1]);
722 vst1q_u64(lmt_addr + 6, cmd1[1]);
723 vst1q_u64(lmt_addr + 8, cmd0[2]);
724 vst1q_u64(lmt_addr + 10, cmd1[2]);
725 vst1q_u64(lmt_addr + 12, cmd0[3]);
726 vst1q_u64(lmt_addr + 14, cmd1[3]);
727 lmt_status = roc_lmt_submit_ldeor(io_addr);
728 } while (lmt_status == 0);
734 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
735 /* Fit consecutive packets in same LMTLINE. */
736 if ((segdw[j] + segdw[j + 1]) <= 8) {
738 if ((flags & NIX_TX_NEED_EXT_HDR) &&
739 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
740 vst1q_u64(lmt_addr, cmd0[j]);
741 vst1q_u64(lmt_addr + 2, cmd2[j]);
742 vst1q_u64(lmt_addr + 4, cmd1[j]);
745 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
747 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
749 vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
750 vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
751 vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
752 roc_lmt_mov_seg(lmt_addr + 14 + off,
753 slist[j + 1], segdw[j + 1] - 4);
754 off += ((segdw[j + 1] - 4) << 1);
755 vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
756 } else if (flags & NIX_TX_NEED_EXT_HDR) {
757 vst1q_u64(lmt_addr, cmd0[j]);
758 vst1q_u64(lmt_addr + 2, cmd2[j]);
759 vst1q_u64(lmt_addr + 4, cmd1[j]);
762 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
764 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
765 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
766 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
767 roc_lmt_mov_seg(lmt_addr + 12 + off,
768 slist[j + 1], segdw[j + 1] - 3);
770 vst1q_u64(lmt_addr, cmd0[j]);
771 vst1q_u64(lmt_addr + 2, cmd1[j]);
774 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
776 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
777 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
778 roc_lmt_mov_seg(lmt_addr + 8 + off,
779 slist[j + 1], segdw[j + 1] - 2);
781 lmt_status = roc_lmt_submit_ldeor(io_addr);
787 if ((flags & NIX_TX_NEED_EXT_HDR) &&
788 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
789 vst1q_u64(lmt_addr, cmd0[j]);
790 vst1q_u64(lmt_addr + 2, cmd2[j]);
791 vst1q_u64(lmt_addr + 4, cmd1[j]);
794 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
796 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
797 } else if (flags & NIX_TX_NEED_EXT_HDR) {
798 vst1q_u64(lmt_addr, cmd0[j]);
799 vst1q_u64(lmt_addr + 2, cmd2[j]);
800 vst1q_u64(lmt_addr + 4, cmd1[j]);
803 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
805 vst1q_u64(lmt_addr, cmd0[j]);
806 vst1q_u64(lmt_addr + 2, cmd1[j]);
809 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
811 lmt_status = roc_lmt_submit_ldeor(io_addr);
819 static __rte_always_inline uint16_t
820 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
821 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
823 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
824 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
825 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
826 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
827 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
828 uint64x2_t senddesc01_w0, senddesc23_w0;
829 uint64x2_t senddesc01_w1, senddesc23_w1;
830 uint64x2_t sendext01_w0, sendext23_w0;
831 uint64x2_t sendext01_w1, sendext23_w1;
832 uint64x2_t sendmem01_w0, sendmem23_w0;
833 uint64x2_t sendmem01_w1, sendmem23_w1;
834 uint64x2_t sgdesc01_w0, sgdesc23_w0;
835 uint64x2_t sgdesc01_w1, sgdesc23_w1;
836 struct cn9k_eth_txq *txq = tx_queue;
837 uint64_t *lmt_addr = txq->lmt_addr;
838 rte_iova_t io_addr = txq->io_addr;
839 uint64x2_t ltypes01, ltypes23;
840 uint64x2_t xtmp128, ytmp128;
841 uint64x2_t xmask01, xmask23;
842 uint64_t lmt_status, i;
845 NIX_XMIT_FC_OR_RETURN(txq, pkts);
847 pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
848 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
850 /* Reduce the cached count */
851 txq->fc_cache_pkts -= pkts;
853 /* Perform header writes before barrier for TSO */
854 if (flags & NIX_TX_OFFLOAD_TSO_F) {
855 for (i = 0; i < pkts; i++)
856 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
859 /* Lets commit any changes in the packet here as no further changes
860 * to the packet will be done unless no fast free is enabled.
862 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
865 senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
866 senddesc23_w0 = senddesc01_w0;
867 senddesc01_w1 = vdupq_n_u64(0);
868 senddesc23_w1 = senddesc01_w1;
870 /* Load command defaults into vector variables. */
871 if (flags & NIX_TX_NEED_EXT_HDR) {
872 sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
873 sendext23_w0 = sendext01_w0;
874 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
875 sendext23_w1 = sendext01_w1;
876 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
877 sgdesc23_w0 = sgdesc01_w0;
878 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
879 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
880 sendmem23_w0 = sendmem01_w0;
881 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
882 sendmem23_w1 = sendmem01_w1;
885 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
886 sgdesc23_w0 = sgdesc01_w0;
889 for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
890 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
892 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
893 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
895 senddesc23_w0 = senddesc01_w0;
896 sgdesc23_w0 = sgdesc01_w0;
898 /* Clear vlan enables. */
899 if (flags & NIX_TX_NEED_EXT_HDR) {
900 sendext01_w1 = vbicq_u64(sendext01_w1,
901 vdupq_n_u64(0x3FFFF00FFFF00));
902 sendext23_w1 = sendext01_w1;
905 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
906 /* Reset send mem alg to SETTSTMP from SUB*/
907 sendmem01_w0 = vbicq_u64(sendmem01_w0,
908 vdupq_n_u64(BIT_ULL(59)));
909 /* Reset send mem address to default. */
911 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
912 sendmem23_w0 = sendmem01_w0;
913 sendmem23_w1 = sendmem01_w1;
916 if (flags & NIX_TX_OFFLOAD_TSO_F) {
917 /* Clear the LSO enable bit. */
918 sendext01_w0 = vbicq_u64(sendext01_w0,
919 vdupq_n_u64(BIT_ULL(14)));
920 sendext23_w0 = sendext01_w0;
923 /* Move mbufs to iova */
924 mbuf0 = (uint64_t *)tx_pkts[0];
925 mbuf1 = (uint64_t *)tx_pkts[1];
926 mbuf2 = (uint64_t *)tx_pkts[2];
927 mbuf3 = (uint64_t *)tx_pkts[3];
929 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
930 offsetof(struct rte_mbuf, buf_iova));
931 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
932 offsetof(struct rte_mbuf, buf_iova));
933 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
934 offsetof(struct rte_mbuf, buf_iova));
935 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
936 offsetof(struct rte_mbuf, buf_iova));
938 * Get mbuf's, olflags, iova, pktlen, dataoff
939 * dataoff_iovaX.D[0] = iova,
940 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
941 * len_olflagsX.D[0] = ol_flags,
942 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
944 dataoff_iova0 = vld1q_u64(mbuf0);
945 len_olflags0 = vld1q_u64(mbuf0 + 2);
946 dataoff_iova1 = vld1q_u64(mbuf1);
947 len_olflags1 = vld1q_u64(mbuf1 + 2);
948 dataoff_iova2 = vld1q_u64(mbuf2);
949 len_olflags2 = vld1q_u64(mbuf2 + 2);
950 dataoff_iova3 = vld1q_u64(mbuf3);
951 len_olflags3 = vld1q_u64(mbuf3 + 2);
953 /* Move mbufs to point pool */
954 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
955 offsetof(struct rte_mbuf, pool) -
956 offsetof(struct rte_mbuf, buf_iova));
957 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
958 offsetof(struct rte_mbuf, pool) -
959 offsetof(struct rte_mbuf, buf_iova));
960 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
961 offsetof(struct rte_mbuf, pool) -
962 offsetof(struct rte_mbuf, buf_iova));
963 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
964 offsetof(struct rte_mbuf, pool) -
965 offsetof(struct rte_mbuf, buf_iova));
967 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
968 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
969 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
971 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
972 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
975 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
976 : [a] "+w"(senddesc01_w1)
977 : [in] "r"(mbuf0 + 2)
980 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
981 : [a] "+w"(senddesc01_w1)
982 : [in] "r"(mbuf1 + 2)
985 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
986 : [b] "+w"(senddesc23_w1)
987 : [in] "r"(mbuf2 + 2)
990 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
991 : [b] "+w"(senddesc23_w1)
992 : [in] "r"(mbuf3 + 2)
995 /* Get pool pointer alone */
996 mbuf0 = (uint64_t *)*mbuf0;
997 mbuf1 = (uint64_t *)*mbuf1;
998 mbuf2 = (uint64_t *)*mbuf2;
999 mbuf3 = (uint64_t *)*mbuf3;
1001 /* Get pool pointer alone */
1002 mbuf0 = (uint64_t *)*mbuf0;
1003 mbuf1 = (uint64_t *)*mbuf1;
1004 mbuf2 = (uint64_t *)*mbuf2;
1005 mbuf3 = (uint64_t *)*mbuf3;
1008 const uint8x16_t shuf_mask2 = {
1009 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1010 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1012 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1013 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1015 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1016 const uint64x2_t and_mask0 = {
1021 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1022 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1023 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1024 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1027 * Pick only 16 bits of pktlen preset at bits 63:32
1028 * and place them at bits 15:0.
1030 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1031 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1033 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1034 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1035 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1037 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1038 * pktlen at 15:0 position.
1040 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1041 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1042 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1043 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1045 /* Move mbuf to point to pool_id. */
1046 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1047 offsetof(struct rte_mempool, pool_id));
1048 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1049 offsetof(struct rte_mempool, pool_id));
1050 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1051 offsetof(struct rte_mempool, pool_id));
1052 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1053 offsetof(struct rte_mempool, pool_id));
1055 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1056 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1058 * Lookup table to translate ol_flags to
1059 * il3/il4 types. But we still use ol3/ol4 types in
1060 * senddesc_w1 as only one header processing is enabled.
1062 const uint8x16_t tbl = {
1063 /* [0-15] = il4type:il3type */
1064 0x04, /* none (IPv6 assumed) */
1065 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1066 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1067 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1068 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1069 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1070 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1071 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1072 0x02, /* RTE_MBUF_F_TX_IPV4 */
1073 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1074 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1075 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1076 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1077 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1078 * RTE_MBUF_F_TX_TCP_CKSUM
1080 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1081 * RTE_MBUF_F_TX_SCTP_CKSUM
1083 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1084 * RTE_MBUF_F_TX_UDP_CKSUM
1088 /* Extract olflags to translate to iltypes */
1089 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1090 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1093 * E(47):L3_LEN(9):L2_LEN(7+z)
1094 * E(47):L3_LEN(9):L2_LEN(7+z)
1096 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1097 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1099 /* Move OLFLAGS bits 55:52 to 51:48
1100 * with zeros preprended on the byte and rest
1103 xtmp128 = vshrq_n_u8(xtmp128, 4);
1104 ytmp128 = vshrq_n_u8(ytmp128, 4);
1106 * E(48):L3_LEN(8):L2_LEN(z+7)
1107 * E(48):L3_LEN(8):L2_LEN(z+7)
1109 const int8x16_t tshft3 = {
1110 -1, 0, 8, 8, 8, 8, 8, 8,
1111 -1, 0, 8, 8, 8, 8, 8, 8,
1114 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1115 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1118 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1119 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1121 /* Pick only relevant fields i.e Bit 48:55 of iltype
1122 * and place it in ol3/ol4type of senddesc_w1
1124 const uint8x16_t shuf_mask0 = {
1125 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1126 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1129 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1130 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1132 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1133 * a [E(32):E(16):OL3(8):OL2(8)]
1135 * a [E(32):E(16):(OL3+OL2):OL2]
1136 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1138 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1139 vshlq_n_u16(senddesc01_w1, 8));
1140 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1141 vshlq_n_u16(senddesc23_w1, 8));
1143 /* Move ltypes to senddesc*_w1 */
1144 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1145 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1146 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1147 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1149 * Lookup table to translate ol_flags to
1153 const uint8x16_t tbl = {
1154 /* [0-15] = ol4type:ol3type */
1156 0x03, /* OUTER_IP_CKSUM */
1157 0x02, /* OUTER_IPV4 */
1158 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1159 0x04, /* OUTER_IPV6 */
1160 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1161 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1162 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1165 0x00, /* OUTER_UDP_CKSUM */
1166 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1167 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1168 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1171 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1172 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1175 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1178 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1179 * OUTER_IPV4 | OUTER_IP_CKSUM
1183 /* Extract olflags to translate to iltypes */
1184 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1185 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1188 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1189 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1191 const uint8x16_t shuf_mask5 = {
1192 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1193 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1195 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1196 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1198 /* Extract outer ol flags only */
1199 const uint64x2_t o_cksum_mask = {
1204 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1205 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1207 /* Extract OUTER_UDP_CKSUM bit 41 and
1211 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1212 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1214 /* Shift oltype by 2 to start nibble from BIT(56)
1215 * instead of BIT(58)
1217 xtmp128 = vshrq_n_u8(xtmp128, 2);
1218 ytmp128 = vshrq_n_u8(ytmp128, 2);
1220 * E(48):L3_LEN(8):L2_LEN(z+7)
1221 * E(48):L3_LEN(8):L2_LEN(z+7)
1223 const int8x16_t tshft3 = {
1224 -1, 0, 8, 8, 8, 8, 8, 8,
1225 -1, 0, 8, 8, 8, 8, 8, 8,
1228 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1229 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1232 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1233 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1235 /* Pick only relevant fields i.e Bit 56:63 of oltype
1236 * and place it in ol3/ol4type of senddesc_w1
1238 const uint8x16_t shuf_mask0 = {
1239 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1240 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1243 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1244 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1246 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1247 * a [E(32):E(16):OL3(8):OL2(8)]
1249 * a [E(32):E(16):(OL3+OL2):OL2]
1250 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1252 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1253 vshlq_n_u16(senddesc01_w1, 8));
1254 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1255 vshlq_n_u16(senddesc23_w1, 8));
1257 /* Move ltypes to senddesc*_w1 */
1258 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1259 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1260 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1261 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1262 /* Lookup table to translate ol_flags to
1263 * ol4type, ol3type, il4type, il3type of senddesc_w1
1265 const uint8x16x2_t tbl = {{
1267 /* [0-15] = il4type:il3type */
1268 0x04, /* none (IPv6) */
1269 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1270 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1271 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1272 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1273 0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1274 * RTE_MBUF_F_TX_TCP_CKSUM
1276 0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
1277 * RTE_MBUF_F_TX_SCTP_CKSUM
1279 0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
1280 * RTE_MBUF_F_TX_UDP_CKSUM
1282 0x02, /* RTE_MBUF_F_TX_IPV4 */
1283 0x12, /* RTE_MBUF_F_TX_IPV4 |
1284 * RTE_MBUF_F_TX_TCP_CKSUM
1286 0x22, /* RTE_MBUF_F_TX_IPV4 |
1287 * RTE_MBUF_F_TX_SCTP_CKSUM
1289 0x32, /* RTE_MBUF_F_TX_IPV4 |
1290 * RTE_MBUF_F_TX_UDP_CKSUM
1292 0x03, /* RTE_MBUF_F_TX_IPV4 |
1293 * RTE_MBUF_F_TX_IP_CKSUM
1295 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1296 * RTE_MBUF_F_TX_TCP_CKSUM
1298 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1299 * RTE_MBUF_F_TX_SCTP_CKSUM
1301 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1302 * RTE_MBUF_F_TX_UDP_CKSUM
1307 /* [16-31] = ol4type:ol3type */
1309 0x03, /* OUTER_IP_CKSUM */
1310 0x02, /* OUTER_IPV4 */
1311 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1312 0x04, /* OUTER_IPV6 */
1313 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1314 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1315 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1318 0x00, /* OUTER_UDP_CKSUM */
1319 0x33, /* OUTER_UDP_CKSUM |
1322 0x32, /* OUTER_UDP_CKSUM |
1325 0x33, /* OUTER_UDP_CKSUM |
1326 * OUTER_IPV4 | OUTER_IP_CKSUM
1328 0x34, /* OUTER_UDP_CKSUM |
1331 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1334 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1337 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1338 * OUTER_IPV4 | OUTER_IP_CKSUM
1343 /* Extract olflags to translate to oltype & iltype */
1344 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1345 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1348 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1349 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1351 const uint32x4_t tshft_4 = {
1357 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1358 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1361 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1362 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1364 const uint8x16_t shuf_mask5 = {
1365 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1366 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1368 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1369 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1371 /* Extract outer and inner header ol_flags */
1372 const uint64x2_t oi_cksum_mask = {
1377 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1378 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1380 /* Extract OUTER_UDP_CKSUM bit 41 and
1384 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1385 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1387 /* Shift right oltype by 2 and iltype by 4
1388 * to start oltype nibble from BIT(58)
1389 * instead of BIT(56) and iltype nibble from BIT(48)
1390 * instead of BIT(52).
1392 const int8x16_t tshft5 = {
1393 8, 8, 8, 8, 8, 8, -4, -2,
1394 8, 8, 8, 8, 8, 8, -4, -2,
1397 xtmp128 = vshlq_u8(xtmp128, tshft5);
1398 ytmp128 = vshlq_u8(ytmp128, tshft5);
1400 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1401 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1403 const int8x16_t tshft3 = {
1404 -1, 0, -1, 0, 0, 0, 0, 0,
1405 -1, 0, -1, 0, 0, 0, 0, 0,
1408 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1409 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1411 /* Mark Bit(4) of oltype */
1412 const uint64x2_t oi_cksum_mask2 = {
1417 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1418 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1421 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1422 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1424 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1425 * Bit 56:63 of oltype and place it in corresponding
1426 * place in senddesc_w1.
1428 const uint8x16_t shuf_mask0 = {
1429 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1430 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1433 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1434 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1436 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1437 * l3len, l2len, ol3len, ol2len.
1438 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1440 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1442 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1443 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1445 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1446 vshlq_n_u32(senddesc01_w1, 8));
1447 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1448 vshlq_n_u32(senddesc23_w1, 8));
1450 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1451 senddesc01_w1 = vaddq_u8(
1452 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1453 senddesc23_w1 = vaddq_u8(
1454 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1456 /* Move ltypes to senddesc*_w1 */
1457 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1458 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1461 xmask01 = vdupq_n_u64(0);
1463 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1468 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1473 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1478 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1482 xmask01 = vshlq_n_u64(xmask01, 20);
1483 xmask23 = vshlq_n_u64(xmask23, 20);
1485 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1486 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1488 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1489 /* Tx ol_flag for vlan. */
1490 const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
1491 /* Bit enable for VLAN1 */
1492 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1493 /* Tx ol_flag for QnQ. */
1494 const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
1495 /* Bit enable for VLAN0 */
1496 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1497 /* Load vlan values from packet. outer is VLAN 0 */
1498 uint64x2_t ext01 = {
1499 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1500 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1501 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1502 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1504 uint64x2_t ext23 = {
1505 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1506 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1507 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1508 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1511 /* Get ol_flags of the packets. */
1512 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1513 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1515 /* ORR vlan outer/inner values into cmd. */
1516 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1517 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1519 /* Test for offload enable bits and generate masks. */
1520 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1522 vandq_u64(vtstq_u64(xtmp128, olq),
1524 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1526 vandq_u64(vtstq_u64(ytmp128, olq),
1529 /* Set vlan enable bits into cmd based on mask. */
1530 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1531 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1534 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1535 /* Tx ol_flag for timestamp. */
1536 const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
1537 RTE_MBUF_F_TX_IEEE1588_TMST};
1538 /* Set send mem alg to SUB. */
1539 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1540 /* Increment send mem address by 8. */
1541 const uint64x2_t addr = {0x8, 0x8};
1543 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1544 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1546 /* Check if timestamp is requested and generate inverted
1547 * mask as we need not make any changes to default cmd
1550 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1551 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1553 /* Change send mem address to an 8 byte offset when
1554 * TSTMP is disabled.
1556 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1557 vandq_u64(xtmp128, addr));
1558 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1559 vandq_u64(ytmp128, addr));
1560 /* Change send mem alg to SUB when TSTMP is disabled. */
1561 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1562 vandq_u64(xtmp128, alg));
1563 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1564 vandq_u64(ytmp128, alg));
1566 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1567 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1568 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1569 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1572 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1573 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1574 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1576 /* Extract SD W1 as we need to set L4 types. */
1577 vst1q_u64(sd_w1, senddesc01_w1);
1578 vst1q_u64(sd_w1 + 2, senddesc23_w1);
1580 /* Extract SX W0 as we need to set LSO fields. */
1581 vst1q_u64(sx_w0, sendext01_w0);
1582 vst1q_u64(sx_w0 + 2, sendext23_w0);
1584 /* Extract ol_flags. */
1585 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1586 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1588 /* Prepare individual mbufs. */
1589 cn9k_nix_prepare_tso(tx_pkts[0],
1590 (union nix_send_hdr_w1_u *)&sd_w1[0],
1591 (union nix_send_ext_w0_u *)&sx_w0[0],
1592 vgetq_lane_u64(xtmp128, 0), flags);
1594 cn9k_nix_prepare_tso(tx_pkts[1],
1595 (union nix_send_hdr_w1_u *)&sd_w1[1],
1596 (union nix_send_ext_w0_u *)&sx_w0[1],
1597 vgetq_lane_u64(xtmp128, 1), flags);
1599 cn9k_nix_prepare_tso(tx_pkts[2],
1600 (union nix_send_hdr_w1_u *)&sd_w1[2],
1601 (union nix_send_ext_w0_u *)&sx_w0[2],
1602 vgetq_lane_u64(ytmp128, 0), flags);
1604 cn9k_nix_prepare_tso(tx_pkts[3],
1605 (union nix_send_hdr_w1_u *)&sd_w1[3],
1606 (union nix_send_ext_w0_u *)&sx_w0[3],
1607 vgetq_lane_u64(ytmp128, 1), flags);
1609 senddesc01_w1 = vld1q_u64(sd_w1);
1610 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1612 sendext01_w0 = vld1q_u64(sx_w0);
1613 sendext23_w0 = vld1q_u64(sx_w0 + 2);
1616 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1617 !(flags & NIX_TX_MULTI_SEG_F)) {
1618 /* Set don't free bit if reference count > 1 */
1619 xmask01 = vdupq_n_u64(0);
1622 /* Move mbufs to iova */
1623 mbuf0 = (uint64_t *)tx_pkts[0];
1624 mbuf1 = (uint64_t *)tx_pkts[1];
1625 mbuf2 = (uint64_t *)tx_pkts[2];
1626 mbuf3 = (uint64_t *)tx_pkts[3];
1628 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1629 vsetq_lane_u64(0x80000, xmask01, 0);
1631 RTE_MEMPOOL_CHECK_COOKIES(
1632 ((struct rte_mbuf *)mbuf0)->pool,
1633 (void **)&mbuf0, 1, 0);
1635 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1636 vsetq_lane_u64(0x80000, xmask01, 1);
1638 RTE_MEMPOOL_CHECK_COOKIES(
1639 ((struct rte_mbuf *)mbuf1)->pool,
1640 (void **)&mbuf1, 1, 0);
1642 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1643 vsetq_lane_u64(0x80000, xmask23, 0);
1645 RTE_MEMPOOL_CHECK_COOKIES(
1646 ((struct rte_mbuf *)mbuf2)->pool,
1647 (void **)&mbuf2, 1, 0);
1649 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1650 vsetq_lane_u64(0x80000, xmask23, 1);
1652 RTE_MEMPOOL_CHECK_COOKIES(
1653 ((struct rte_mbuf *)mbuf3)->pool,
1654 (void **)&mbuf3, 1, 0);
1655 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1656 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1657 /* Ensuring mbuf fields which got updated in
1658 * cnxk_nix_prefree_seg are written before LMTST.
1661 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1662 /* Move mbufs to iova */
1663 mbuf0 = (uint64_t *)tx_pkts[0];
1664 mbuf1 = (uint64_t *)tx_pkts[1];
1665 mbuf2 = (uint64_t *)tx_pkts[2];
1666 mbuf3 = (uint64_t *)tx_pkts[3];
1668 /* Mark mempool object as "put" since
1669 * it is freed by NIX
1671 RTE_MEMPOOL_CHECK_COOKIES(
1672 ((struct rte_mbuf *)mbuf0)->pool,
1673 (void **)&mbuf0, 1, 0);
1675 RTE_MEMPOOL_CHECK_COOKIES(
1676 ((struct rte_mbuf *)mbuf1)->pool,
1677 (void **)&mbuf1, 1, 0);
1679 RTE_MEMPOOL_CHECK_COOKIES(
1680 ((struct rte_mbuf *)mbuf2)->pool,
1681 (void **)&mbuf2, 1, 0);
1683 RTE_MEMPOOL_CHECK_COOKIES(
1684 ((struct rte_mbuf *)mbuf3)->pool,
1685 (void **)&mbuf3, 1, 0);
1686 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1691 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1692 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1693 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1694 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1695 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1697 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1698 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1699 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1700 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1702 if (flags & NIX_TX_NEED_EXT_HDR) {
1703 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1704 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1705 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1706 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1709 if (flags & NIX_TX_MULTI_SEG_F) {
1710 uint64_t seg_list[NIX_DESCS_PER_LOOP]
1711 [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
1712 uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
1714 /* Build mseg list for each packet individually. */
1715 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1716 segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
1717 seg_list[j], &cmd0[j],
1721 /* Commit all changes to mbuf before LMTST. */
1722 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1725 cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
1729 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1730 /* With ext header in the command we can no longer send
1731 * all 4 packets together since LMTLINE is 128bytes.
1732 * Split and Tx twice.
1735 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1736 vst1q_u64(lmt_addr, cmd0[0]);
1737 vst1q_u64(lmt_addr + 2, cmd2[0]);
1738 vst1q_u64(lmt_addr + 4, cmd1[0]);
1739 vst1q_u64(lmt_addr + 6, cmd3[0]);
1740 vst1q_u64(lmt_addr + 8, cmd0[1]);
1741 vst1q_u64(lmt_addr + 10, cmd2[1]);
1742 vst1q_u64(lmt_addr + 12, cmd1[1]);
1743 vst1q_u64(lmt_addr + 14, cmd3[1]);
1745 vst1q_u64(lmt_addr, cmd0[0]);
1746 vst1q_u64(lmt_addr + 2, cmd2[0]);
1747 vst1q_u64(lmt_addr + 4, cmd1[0]);
1748 vst1q_u64(lmt_addr + 6, cmd0[1]);
1749 vst1q_u64(lmt_addr + 8, cmd2[1]);
1750 vst1q_u64(lmt_addr + 10, cmd1[1]);
1752 lmt_status = roc_lmt_submit_ldeor(io_addr);
1753 } while (lmt_status == 0);
1756 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1757 vst1q_u64(lmt_addr, cmd0[2]);
1758 vst1q_u64(lmt_addr + 2, cmd2[2]);
1759 vst1q_u64(lmt_addr + 4, cmd1[2]);
1760 vst1q_u64(lmt_addr + 6, cmd3[2]);
1761 vst1q_u64(lmt_addr + 8, cmd0[3]);
1762 vst1q_u64(lmt_addr + 10, cmd2[3]);
1763 vst1q_u64(lmt_addr + 12, cmd1[3]);
1764 vst1q_u64(lmt_addr + 14, cmd3[3]);
1766 vst1q_u64(lmt_addr, cmd0[2]);
1767 vst1q_u64(lmt_addr + 2, cmd2[2]);
1768 vst1q_u64(lmt_addr + 4, cmd1[2]);
1769 vst1q_u64(lmt_addr + 6, cmd0[3]);
1770 vst1q_u64(lmt_addr + 8, cmd2[3]);
1771 vst1q_u64(lmt_addr + 10, cmd1[3]);
1773 lmt_status = roc_lmt_submit_ldeor(io_addr);
1774 } while (lmt_status == 0);
1777 vst1q_u64(lmt_addr, cmd0[0]);
1778 vst1q_u64(lmt_addr + 2, cmd1[0]);
1779 vst1q_u64(lmt_addr + 4, cmd0[1]);
1780 vst1q_u64(lmt_addr + 6, cmd1[1]);
1781 vst1q_u64(lmt_addr + 8, cmd0[2]);
1782 vst1q_u64(lmt_addr + 10, cmd1[2]);
1783 vst1q_u64(lmt_addr + 12, cmd0[3]);
1784 vst1q_u64(lmt_addr + 14, cmd1[3]);
1785 lmt_status = roc_lmt_submit_ldeor(io_addr);
1786 } while (lmt_status == 0);
1788 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1791 if (unlikely(pkts_left)) {
1792 if (flags & NIX_TX_MULTI_SEG_F)
1793 pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
1794 pkts_left, cmd, flags);
1796 pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
1804 static __rte_always_inline uint16_t
1805 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1806 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1808 RTE_SET_USED(tx_queue);
1809 RTE_SET_USED(tx_pkts);
1812 RTE_SET_USED(flags);
1817 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1818 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1819 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1820 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1821 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1822 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1823 #define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F
1825 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1826 #define NIX_TX_FASTPATH_MODES_0_15 \
1827 T(no_offload, 4, NIX_TX_OFFLOAD_NONE) \
1828 T(l3l4csum, 4, L3L4CSUM_F) \
1829 T(ol3ol4csum, 4, OL3OL4CSUM_F) \
1830 T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F) \
1831 T(vlan, 6, VLAN_F) \
1832 T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F) \
1833 T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F) \
1834 T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1835 T(noff, 4, NOFF_F) \
1836 T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F) \
1837 T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F) \
1838 T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1839 T(noff_vlan, 6, NOFF_F | VLAN_F) \
1840 T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F) \
1841 T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1842 T(noff_vlan_ol3ol4csum_l3l4csum, 6, \
1843 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1845 #define NIX_TX_FASTPATH_MODES_16_31 \
1847 T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F) \
1848 T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F) \
1849 T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1850 T(tso_vlan, 6, TSO_F | VLAN_F) \
1851 T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F) \
1852 T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F) \
1853 T(tso_vlan_ol3ol4csum_l3l4csum, 6, \
1854 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1855 T(tso_noff, 6, TSO_F | NOFF_F) \
1856 T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F) \
1857 T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F) \
1858 T(tso_noff_ol3ol4csum_l3l4csum, 6, \
1859 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1860 T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F) \
1861 T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1862 T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1863 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
1864 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1866 #define NIX_TX_FASTPATH_MODES_32_47 \
1868 T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F) \
1869 T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F) \
1870 T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1871 T(ts_vlan, 8, TSP_F | VLAN_F) \
1872 T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F) \
1873 T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F) \
1874 T(ts_vlan_ol3ol4csum_l3l4csum, 8, \
1875 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1876 T(ts_noff, 8, TSP_F | NOFF_F) \
1877 T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F) \
1878 T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F) \
1879 T(ts_noff_ol3ol4csum_l3l4csum, 8, \
1880 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1881 T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F) \
1882 T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1883 T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1884 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
1885 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1887 #define NIX_TX_FASTPATH_MODES_48_63 \
1888 T(ts_tso, 8, TSP_F | TSO_F) \
1889 T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F) \
1890 T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F) \
1891 T(ts_tso_ol3ol4csum_l3l4csum, 8, \
1892 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1893 T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F) \
1894 T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1895 T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1896 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
1897 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1898 T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F) \
1899 T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1900 T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1901 T(ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
1902 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1903 T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F) \
1904 T(ts_tso_noff_vlan_l3l4csum, 8, \
1905 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1906 T(ts_tso_noff_vlan_ol3ol4csum, 8, \
1907 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1908 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
1909 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1911 #define NIX_TX_FASTPATH_MODES_64_79 \
1912 T(sec, 4, T_SEC_F) \
1913 T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F) \
1914 T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F) \
1915 T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1916 T(sec_vlan, 6, T_SEC_F | VLAN_F) \
1917 T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F) \
1918 T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F) \
1919 T(sec_vlan_ol3ol4csum_l3l4csum, 6, \
1920 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1921 T(sec_noff, 4, T_SEC_F | NOFF_F) \
1922 T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F) \
1923 T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F) \
1924 T(sec_noff_ol3ol4csum_l3l4csum, 4, \
1925 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1926 T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F) \
1927 T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1928 T(sec_noff_vlan_ol3ol4csum, 6, \
1929 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1930 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6, \
1931 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1933 #define NIX_TX_FASTPATH_MODES_80_95 \
1934 T(sec_tso, 6, T_SEC_F | TSO_F) \
1935 T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F) \
1936 T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F) \
1937 T(sec_tso_ol3ol4csum_l3l4csum, 6, \
1938 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1939 T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F) \
1940 T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1941 T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1942 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6, \
1943 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1944 T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F) \
1945 T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1946 T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1947 T(sec_tso_noff_ol3ol4csum_l3l4csum, 6, \
1948 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1949 T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F) \
1950 T(sec_tso_noff_vlan_l3l4csum, 6, \
1951 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1952 T(sec_tso_noff_vlan_ol3ol4csum, 6, \
1953 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1954 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
1955 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1957 #define NIX_TX_FASTPATH_MODES_96_111 \
1958 T(sec_ts, 8, T_SEC_F | TSP_F) \
1959 T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F) \
1960 T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F) \
1961 T(sec_ts_ol3ol4csum_l3l4csum, 8, \
1962 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1963 T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F) \
1964 T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \
1965 T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \
1966 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8, \
1967 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1968 T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F) \
1969 T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \
1970 T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \
1971 T(sec_ts_noff_ol3ol4csum_l3l4csum, 8, \
1972 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1973 T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F) \
1974 T(sec_ts_noff_vlan_l3l4csum, 8, \
1975 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1976 T(sec_ts_noff_vlan_ol3ol4csum, 8, \
1977 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1978 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
1979 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1981 #define NIX_TX_FASTPATH_MODES_112_127 \
1982 T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F) \
1983 T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \
1984 T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \
1985 T(sec_ts_tso_ol3ol4csum_l3l4csum, 8, \
1986 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1987 T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F) \
1988 T(sec_ts_tso_vlan_l3l4csum, 8, \
1989 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1990 T(sec_ts_tso_vlan_ol3ol4csum, 8, \
1991 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1992 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
1993 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1994 T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F) \
1995 T(sec_ts_tso_noff_l3l4csum, 8, \
1996 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1997 T(sec_ts_tso_noff_ol3ol4csum, 8, \
1998 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1999 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2000 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2001 T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \
2002 T(sec_ts_tso_noff_vlan_l3l4csum, 8, \
2003 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2004 T(sec_ts_tso_noff_vlan_ol3ol4csum, 8, \
2005 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2006 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2007 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2010 #define NIX_TX_FASTPATH_MODES \
2011 NIX_TX_FASTPATH_MODES_0_15 \
2012 NIX_TX_FASTPATH_MODES_16_31 \
2013 NIX_TX_FASTPATH_MODES_32_47 \
2014 NIX_TX_FASTPATH_MODES_48_63 \
2015 NIX_TX_FASTPATH_MODES_64_79 \
2016 NIX_TX_FASTPATH_MODES_80_95 \
2017 NIX_TX_FASTPATH_MODES_96_111 \
2018 NIX_TX_FASTPATH_MODES_112_127
2020 #define T(name, sz, flags) \
2021 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name( \
2022 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2023 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name( \
2024 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2025 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
2026 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2027 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
2028 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2030 NIX_TX_FASTPATH_MODES
2033 #define NIX_TX_XMIT(fn, sz, flags) \
2034 uint16_t __rte_noinline __rte_hot fn( \
2035 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2038 /* For TSO inner checksum is a must */ \
2039 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2040 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2042 return cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
2046 #define NIX_TX_XMIT_MSEG(fn, sz, flags) \
2047 uint16_t __rte_noinline __rte_hot fn( \
2048 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2050 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
2051 /* For TSO inner checksum is a must */ \
2052 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2053 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2055 return cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
2056 (flags) | NIX_TX_MULTI_SEG_F); \
2059 #define NIX_TX_XMIT_VEC(fn, sz, flags) \
2060 uint16_t __rte_noinline __rte_hot fn( \
2061 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2064 /* For TSO inner checksum is a must */ \
2065 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2066 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2068 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2072 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags) \
2073 uint16_t __rte_noinline __rte_hot fn( \
2074 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2076 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
2077 /* For TSO inner checksum is a must */ \
2078 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2079 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2081 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2083 NIX_TX_MULTI_SEG_F); \
2086 #endif /* __CN9K_TX_H__ */