1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
16 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
17 #define NIX_TX_OFFLOAD_MAX (NIX_TX_OFFLOAD_SECURITY_F << 1)
19 /* Flags to control xmit_prepare function.
20 * Defining it from backwards to denote its been
21 * not used as offload flags to pick function
23 #define NIX_TX_MULTI_SEG_F BIT(15)
25 #define NIX_TX_NEED_SEND_HDR_W1 \
26 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
27 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
29 #define NIX_TX_NEED_EXT_HDR \
30 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
35 /* Cached value is low, Update the fc_cache_pkts */ \
36 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
37 /* Multiply with sqe_per_sqb to express in pkts */ \
38 (txq)->fc_cache_pkts = \
39 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
40 << (txq)->sqes_per_sqb_log2; \
41 /* Check it again for the room */ \
42 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
47 /* Function to determine no of tx subdesc required in case ext
48 * sub desc is enabled.
50 static __rte_always_inline int
51 cn9k_nix_tx_ext_subs(const uint16_t flags)
53 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
56 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
61 static __rte_always_inline void
62 cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,
63 const uint16_t flags, const uint16_t static_sz)
66 cmd[0] = txq->send_hdr_w0;
68 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
69 ((uint64_t)(cn9k_nix_tx_ext_subs(flags) + 1) << 40);
72 if (flags & NIX_TX_NEED_EXT_HDR) {
73 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
74 cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
76 cmd[2] = NIX_SUBDC_EXT << 60;
78 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
80 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
84 static __rte_always_inline void
85 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
87 uint64_t mask, ol_flags = m->ol_flags;
89 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
90 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
91 uint16_t *iplen, *oiplen, *oudplen;
92 uint16_t lso_sb, paylen;
94 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
95 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
96 m->l2_len + m->l3_len + m->l4_len;
98 /* Reduce payload len from base headers */
99 paylen = m->pkt_len - lso_sb;
101 /* Get iplen position assuming no tunnel hdr */
102 iplen = (uint16_t *)(mdata + m->l2_len +
103 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
104 /* Handle tunnel tso */
105 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
106 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
107 const uint8_t is_udp_tun =
108 (CNXK_NIX_UDP_TUN_BITMASK >>
109 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
112 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
114 RTE_MBUF_F_TX_OUTER_IPV6)));
115 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
118 /* Update format for UDP tunneled packet */
120 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
121 m->outer_l3_len + 4);
122 *oudplen = rte_cpu_to_be_16(
123 rte_be_to_cpu_16(*oudplen) - paylen);
126 /* Update iplen position to inner ip hdr */
127 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
129 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
132 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
136 static __rte_always_inline void
137 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
138 const uint64_t lso_tun_fmt)
140 struct nix_send_ext_s *send_hdr_ext;
141 struct nix_send_hdr_s *send_hdr;
142 uint64_t ol_flags = 0, mask;
143 union nix_send_hdr_w1_u w1;
144 union nix_send_sg_s *sg;
146 send_hdr = (struct nix_send_hdr_s *)cmd;
147 if (flags & NIX_TX_NEED_EXT_HDR) {
148 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
149 sg = (union nix_send_sg_s *)(cmd + 4);
150 /* Clear previous markings */
151 send_hdr_ext->w0.lso = 0;
152 send_hdr_ext->w1.u = 0;
154 sg = (union nix_send_sg_s *)(cmd + 2);
157 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
158 ol_flags = m->ol_flags;
162 if (!(flags & NIX_TX_MULTI_SEG_F))
163 send_hdr->w0.total = m->data_len;
165 send_hdr->w0.total = m->pkt_len;
166 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
170 * 3 => IPV4 with csum
172 * L3type and L3ptr needs to be set for either
173 * L3 csum or L4 csum or LSO
177 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
178 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
179 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
180 const uint8_t ol3type =
181 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
182 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
183 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
186 w1.ol3type = ol3type;
187 mask = 0xffffull << ((!!ol3type) << 4);
188 w1.ol3ptr = ~mask & m->outer_l2_len;
189 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
192 w1.ol4type = csum + (csum << 1);
195 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
196 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
197 w1.il3ptr = w1.ol4ptr + m->l2_len;
198 w1.il4ptr = w1.il3ptr + m->l3_len;
199 /* Increment it by 1 if it is IPV4 as 3 is with csum */
200 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
203 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
205 /* In case of no tunnel header use only
206 * shift IL3/IL4 fields a bit to use
207 * OL3/OL4 for header checksum
210 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
211 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
213 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
214 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
215 const uint8_t outer_l2_len = m->outer_l2_len;
218 w1.ol3ptr = outer_l2_len;
219 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
220 /* Increment it by 1 if it is IPV4 as 3 is with csum */
221 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
222 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
223 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
226 w1.ol4type = csum + (csum << 1);
228 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
229 const uint8_t l2_len = m->l2_len;
231 /* Always use OLXPTR and OLXTYPE when only
232 * when one header is present
237 w1.ol4ptr = l2_len + m->l3_len;
238 /* Increment it by 1 if it is IPV4 as 3 is with csum */
239 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
240 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
241 !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
244 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
247 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
248 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
249 /* HW will update ptr after vlan0 update */
250 send_hdr_ext->w1.vlan1_ins_ptr = 12;
251 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
253 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
254 /* 2B before end of l2 header */
255 send_hdr_ext->w1.vlan0_ins_ptr = 12;
256 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
259 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
263 mask = -(!w1.il3type);
264 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
266 send_hdr_ext->w0.lso_sb = lso_sb;
267 send_hdr_ext->w0.lso = 1;
268 send_hdr_ext->w0.lso_mps = m->tso_segsz;
269 send_hdr_ext->w0.lso_format =
270 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
271 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
273 /* Handle tunnel tso */
274 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
275 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
276 const uint8_t is_udp_tun =
277 (CNXK_NIX_UDP_TUN_BITMASK >>
278 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
280 uint8_t shift = is_udp_tun ? 32 : 0;
282 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
283 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
285 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
286 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
287 /* Update format for UDP tunneled packet */
288 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
292 if (flags & NIX_TX_NEED_SEND_HDR_W1)
293 send_hdr->w1.u = w1.u;
295 if (!(flags & NIX_TX_MULTI_SEG_F)) {
296 sg->seg1_size = m->data_len;
297 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
299 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
300 /* DF bit = 1 if refcount of current mbuf or parent mbuf
302 * DF bit = 0 otherwise
304 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
305 /* Ensuring mbuf fields which got updated in
306 * cnxk_nix_prefree_seg are written before LMTST.
310 /* Mark mempool object as "put" since it is freed by NIX */
311 if (!send_hdr->w0.df)
312 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
314 sg->seg1_size = m->data_len;
315 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
317 /* NOFF is handled later for multi-seg */
321 static __rte_always_inline void
322 cn9k_nix_xmit_prepare_tstamp(struct cn9k_eth_txq *txq, uint64_t *cmd,
323 const uint64_t ol_flags, const uint16_t no_segdw,
324 const uint16_t flags)
326 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
327 struct nix_send_mem_s *send_mem;
328 uint16_t off = (no_segdw - 1) << 1;
329 const uint8_t is_ol_tstamp =
330 !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
332 send_mem = (struct nix_send_mem_s *)(cmd + off);
334 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
335 * should not be recorded, hence changing the alg type to
336 * NIX_SENDMEMALG_SUB and also changing send mem addr field to
337 * next 8 bytes as it corrupts the actual Tx tstamp registered
340 send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM;
341 send_mem->w0.cn9k.alg =
342 NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
344 send_mem->addr = (rte_iova_t)(((uint64_t *)txq->ts_mem) +
349 static __rte_always_inline void
350 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
351 const uint32_t flags)
356 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
357 lmt_status = roc_lmt_submit_ldeor(io_addr);
358 } while (lmt_status == 0);
361 static __rte_always_inline void
362 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
364 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
367 static __rte_always_inline uint64_t
368 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
370 return roc_lmt_submit_ldeor(io_addr);
373 static __rte_always_inline uint64_t
374 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
376 return roc_lmt_submit_ldeorl(io_addr);
379 static __rte_always_inline uint16_t
380 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
382 struct nix_send_hdr_s *send_hdr;
383 union nix_send_sg_s *sg;
384 struct rte_mbuf *m_next;
385 uint64_t *slist, sg_u;
390 send_hdr = (struct nix_send_hdr_s *)cmd;
392 if (flags & NIX_TX_NEED_EXT_HDR)
397 sg = (union nix_send_sg_s *)&cmd[2 + off];
399 /* Start from second segment, first segment is already there */
402 nb_segs = m->nb_segs - 1;
404 slist = &cmd[3 + off + 1];
406 /* Set invert df if buffer is not to be freed by H/W */
407 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
408 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
412 /* Mark mempool object as "put" since it is freed by NIX */
413 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
414 if (!(sg_u & (1ULL << 55)))
415 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
422 /* Fill mbuf segments */
425 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
426 *slist = rte_mbuf_data_iova(m);
427 /* Set invert df if buffer is not to be freed by H/W */
428 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
429 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
430 /* Commit changes to mbuf */
433 /* Mark mempool object as "put" since it is freed by NIX */
434 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
435 if (!(sg_u & (1ULL << (i + 55))))
436 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
442 if (i > 2 && nb_segs) {
444 /* Next SG subdesc */
445 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
448 sg = (union nix_send_sg_s *)slist;
458 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
459 /* Roundup extra dwords to multiple of 2 */
460 segdw = (segdw >> 1) + (segdw & 0x1);
462 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
463 send_hdr->w0.sizem1 = segdw - 1;
468 static __rte_always_inline void
469 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
471 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
474 static __rte_always_inline void
475 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
481 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
482 lmt_status = roc_lmt_submit_ldeor(io_addr);
483 } while (lmt_status == 0);
486 static __rte_always_inline void
487 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
488 rte_iova_t io_addr, uint16_t segdw)
494 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
495 lmt_status = roc_lmt_submit_ldeor(io_addr);
496 } while (lmt_status == 0);
499 static __rte_always_inline uint16_t
500 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
501 uint64_t *cmd, const uint16_t flags)
503 struct cn9k_eth_txq *txq = tx_queue;
504 const rte_iova_t io_addr = txq->io_addr;
505 void *lmt_addr = txq->lmt_addr;
506 uint64_t lso_tun_fmt;
509 NIX_XMIT_FC_OR_RETURN(txq, pkts);
511 cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
513 /* Perform header writes before barrier for TSO */
514 if (flags & NIX_TX_OFFLOAD_TSO_F) {
515 lso_tun_fmt = txq->lso_tun_fmt;
517 for (i = 0; i < pkts; i++)
518 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
521 /* Lets commit any changes in the packet here as no further changes
522 * to the packet will be done unless no fast free is enabled.
524 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
527 for (i = 0; i < pkts; i++) {
528 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
529 cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,
531 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
534 /* Reduce the cached count */
535 txq->fc_cache_pkts -= pkts;
540 static __rte_always_inline uint16_t
541 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
542 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
544 struct cn9k_eth_txq *txq = tx_queue;
545 const rte_iova_t io_addr = txq->io_addr;
546 void *lmt_addr = txq->lmt_addr;
547 uint64_t lso_tun_fmt;
551 NIX_XMIT_FC_OR_RETURN(txq, pkts);
553 cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
555 /* Perform header writes before barrier for TSO */
556 if (flags & NIX_TX_OFFLOAD_TSO_F) {
557 lso_tun_fmt = txq->lso_tun_fmt;
559 for (i = 0; i < pkts; i++)
560 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
563 /* Lets commit any changes in the packet here as no further changes
564 * to the packet will be done unless no fast free is enabled.
566 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
569 for (i = 0; i < pkts; i++) {
570 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
571 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
572 cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,
574 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
577 /* Reduce the cached count */
578 txq->fc_cache_pkts -= pkts;
583 #if defined(RTE_ARCH_ARM64)
585 static __rte_always_inline void
586 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
587 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
593 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
596 mask = -(!w1->il3type);
597 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
601 w0->lso_mps = m->tso_segsz;
602 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
603 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
605 /* Handle tunnel tso */
606 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
607 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
608 const uint8_t is_udp_tun =
609 (CNXK_NIX_UDP_TUN_BITMASK >>
610 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
613 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
614 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
615 /* Update format for UDP tunneled packet */
616 w0->lso_format += is_udp_tun ? 2 : 6;
618 w0->lso_format += !!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 1;
622 static __rte_always_inline uint8_t
623 cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
624 union nix_send_hdr_w0_u *sh,
625 union nix_send_sg_s *sg, const uint32_t flags)
627 struct rte_mbuf *m_next;
628 uint64_t *slist, sg_u;
633 sh->total = m->pkt_len;
634 /* Clear sg->u header before use */
635 sg->u &= 0xFC00000000000000;
639 sg_u = sg_u | ((uint64_t)m->data_len);
641 nb_segs = m->nb_segs - 1;
644 /* Set invert df if buffer is not to be freed by H/W */
645 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
646 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
647 /* Mark mempool object as "put" since it is freed by NIX */
648 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
649 if (!(sg_u & (1ULL << 55)))
650 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
655 /* Fill mbuf segments */
658 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
659 *slist = rte_mbuf_data_iova(m);
660 /* Set invert df if buffer is not to be freed by H/W */
661 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
662 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
663 /* Mark mempool object as "put" since it is freed by NIX
665 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
666 if (!(sg_u & (1ULL << (i + 55))))
667 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
673 if (i > 2 && nb_segs) {
675 /* Next SG subdesc */
676 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
679 sg = (union nix_send_sg_s *)slist;
688 segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
691 /* Roundup extra dwords to multiple of 2 */
692 segdw = (segdw >> 1) + (segdw & 0x1);
694 segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
695 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
696 sh->sizem1 = segdw - 1;
701 static __rte_always_inline uint8_t
702 cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
703 uint64x2_t *cmd1, const uint32_t flags)
705 union nix_send_hdr_w0_u sh;
706 union nix_send_sg_s sg;
709 if (m->nb_segs == 1) {
710 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
711 sg.u = vgetq_lane_u64(cmd1[0], 0);
712 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
713 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
716 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
717 sg.u = vgetq_lane_u64(cmd1[0], 0);
718 if (!(sg.u & (1ULL << 55)))
719 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
722 return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
723 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
726 sh.u = vgetq_lane_u64(cmd0[0], 0);
727 sg.u = vgetq_lane_u64(cmd1[0], 0);
729 ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
731 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
732 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
736 #define NIX_DESCS_PER_LOOP 4
738 static __rte_always_inline void
739 cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
740 uint64x2_t *cmd2, uint64x2_t *cmd3,
742 uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
743 uint64_t *lmt_addr, rte_iova_t io_addr,
744 const uint32_t flags)
749 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
750 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
751 /* No segments in 4 consecutive packets. */
752 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
754 vst1q_u64(lmt_addr, cmd0[0]);
755 vst1q_u64(lmt_addr + 2, cmd1[0]);
756 vst1q_u64(lmt_addr + 4, cmd0[1]);
757 vst1q_u64(lmt_addr + 6, cmd1[1]);
758 vst1q_u64(lmt_addr + 8, cmd0[2]);
759 vst1q_u64(lmt_addr + 10, cmd1[2]);
760 vst1q_u64(lmt_addr + 12, cmd0[3]);
761 vst1q_u64(lmt_addr + 14, cmd1[3]);
762 lmt_status = roc_lmt_submit_ldeor(io_addr);
763 } while (lmt_status == 0);
769 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
770 /* Fit consecutive packets in same LMTLINE. */
771 if ((segdw[j] + segdw[j + 1]) <= 8) {
773 if ((flags & NIX_TX_NEED_EXT_HDR) &&
774 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
775 vst1q_u64(lmt_addr, cmd0[j]);
776 vst1q_u64(lmt_addr + 2, cmd2[j]);
777 vst1q_u64(lmt_addr + 4, cmd1[j]);
780 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
782 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
784 vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
785 vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
786 vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
787 roc_lmt_mov_seg(lmt_addr + 14 + off,
788 slist[j + 1], segdw[j + 1] - 4);
789 off += ((segdw[j + 1] - 4) << 1);
790 vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
791 } else if (flags & NIX_TX_NEED_EXT_HDR) {
792 vst1q_u64(lmt_addr, cmd0[j]);
793 vst1q_u64(lmt_addr + 2, cmd2[j]);
794 vst1q_u64(lmt_addr + 4, cmd1[j]);
797 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
799 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
800 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
801 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
802 roc_lmt_mov_seg(lmt_addr + 12 + off,
803 slist[j + 1], segdw[j + 1] - 3);
805 vst1q_u64(lmt_addr, cmd0[j]);
806 vst1q_u64(lmt_addr + 2, cmd1[j]);
809 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
811 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
812 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
813 roc_lmt_mov_seg(lmt_addr + 8 + off,
814 slist[j + 1], segdw[j + 1] - 2);
816 lmt_status = roc_lmt_submit_ldeor(io_addr);
822 if ((flags & NIX_TX_NEED_EXT_HDR) &&
823 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
824 vst1q_u64(lmt_addr, cmd0[j]);
825 vst1q_u64(lmt_addr + 2, cmd2[j]);
826 vst1q_u64(lmt_addr + 4, cmd1[j]);
829 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
831 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
832 } else if (flags & NIX_TX_NEED_EXT_HDR) {
833 vst1q_u64(lmt_addr, cmd0[j]);
834 vst1q_u64(lmt_addr + 2, cmd2[j]);
835 vst1q_u64(lmt_addr + 4, cmd1[j]);
838 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
840 vst1q_u64(lmt_addr, cmd0[j]);
841 vst1q_u64(lmt_addr + 2, cmd1[j]);
844 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
846 lmt_status = roc_lmt_submit_ldeor(io_addr);
854 static __rte_always_inline uint16_t
855 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
856 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
858 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
859 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
860 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
861 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
862 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
863 uint64x2_t senddesc01_w0, senddesc23_w0;
864 uint64x2_t senddesc01_w1, senddesc23_w1;
865 uint64x2_t sendext01_w0, sendext23_w0;
866 uint64x2_t sendext01_w1, sendext23_w1;
867 uint64x2_t sendmem01_w0, sendmem23_w0;
868 uint64x2_t sendmem01_w1, sendmem23_w1;
869 uint64x2_t sgdesc01_w0, sgdesc23_w0;
870 uint64x2_t sgdesc01_w1, sgdesc23_w1;
871 struct cn9k_eth_txq *txq = tx_queue;
872 uint64_t *lmt_addr = txq->lmt_addr;
873 rte_iova_t io_addr = txq->io_addr;
874 uint64x2_t ltypes01, ltypes23;
875 uint64x2_t xtmp128, ytmp128;
876 uint64x2_t xmask01, xmask23;
877 uint64_t lmt_status, i;
880 NIX_XMIT_FC_OR_RETURN(txq, pkts);
882 pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
883 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
885 /* Reduce the cached count */
886 txq->fc_cache_pkts -= pkts;
888 /* Perform header writes before barrier for TSO */
889 if (flags & NIX_TX_OFFLOAD_TSO_F) {
890 for (i = 0; i < pkts; i++)
891 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
894 /* Lets commit any changes in the packet here as no further changes
895 * to the packet will be done unless no fast free is enabled.
897 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
900 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
901 senddesc23_w0 = senddesc01_w0;
903 senddesc01_w1 = vdupq_n_u64(0);
904 senddesc23_w1 = senddesc01_w1;
905 sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
906 sgdesc23_w0 = sgdesc01_w0;
908 if (flags & NIX_TX_NEED_EXT_HDR) {
909 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
910 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
913 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
914 (NIX_SENDMEMALG_SETTSTMP << 56));
915 sendmem23_w0 = sendmem01_w0;
916 sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
917 sendmem23_w1 = sendmem01_w1;
919 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
921 sendext23_w0 = sendext01_w0;
923 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
924 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
926 sendext01_w1 = vdupq_n_u64(0);
927 sendext23_w1 = sendext01_w1;
930 for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
931 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
933 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
934 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
936 senddesc23_w0 = senddesc01_w0;
937 sgdesc23_w0 = sgdesc01_w0;
939 /* Clear vlan enables. */
940 if (flags & NIX_TX_NEED_EXT_HDR) {
941 sendext01_w1 = vbicq_u64(sendext01_w1,
942 vdupq_n_u64(0x3FFFF00FFFF00));
943 sendext23_w1 = sendext01_w1;
946 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
947 /* Reset send mem alg to SETTSTMP from SUB*/
948 sendmem01_w0 = vbicq_u64(sendmem01_w0,
949 vdupq_n_u64(BIT_ULL(59)));
950 /* Reset send mem address to default. */
952 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
953 sendmem23_w0 = sendmem01_w0;
954 sendmem23_w1 = sendmem01_w1;
957 if (flags & NIX_TX_OFFLOAD_TSO_F) {
958 /* Clear the LSO enable bit. */
959 sendext01_w0 = vbicq_u64(sendext01_w0,
960 vdupq_n_u64(BIT_ULL(14)));
961 sendext23_w0 = sendext01_w0;
964 /* Move mbufs to iova */
965 mbuf0 = (uint64_t *)tx_pkts[0];
966 mbuf1 = (uint64_t *)tx_pkts[1];
967 mbuf2 = (uint64_t *)tx_pkts[2];
968 mbuf3 = (uint64_t *)tx_pkts[3];
970 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
971 offsetof(struct rte_mbuf, buf_iova));
972 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
973 offsetof(struct rte_mbuf, buf_iova));
974 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
975 offsetof(struct rte_mbuf, buf_iova));
976 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
977 offsetof(struct rte_mbuf, buf_iova));
979 * Get mbuf's, olflags, iova, pktlen, dataoff
980 * dataoff_iovaX.D[0] = iova,
981 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
982 * len_olflagsX.D[0] = ol_flags,
983 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
985 dataoff_iova0 = vld1q_u64(mbuf0);
986 len_olflags0 = vld1q_u64(mbuf0 + 2);
987 dataoff_iova1 = vld1q_u64(mbuf1);
988 len_olflags1 = vld1q_u64(mbuf1 + 2);
989 dataoff_iova2 = vld1q_u64(mbuf2);
990 len_olflags2 = vld1q_u64(mbuf2 + 2);
991 dataoff_iova3 = vld1q_u64(mbuf3);
992 len_olflags3 = vld1q_u64(mbuf3 + 2);
994 /* Move mbufs to point pool */
995 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
996 offsetof(struct rte_mbuf, pool) -
997 offsetof(struct rte_mbuf, buf_iova));
998 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
999 offsetof(struct rte_mbuf, pool) -
1000 offsetof(struct rte_mbuf, buf_iova));
1001 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1002 offsetof(struct rte_mbuf, pool) -
1003 offsetof(struct rte_mbuf, buf_iova));
1004 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1005 offsetof(struct rte_mbuf, pool) -
1006 offsetof(struct rte_mbuf, buf_iova));
1008 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1009 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1010 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1012 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1013 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1016 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1017 : [a] "+w"(senddesc01_w1)
1018 : [in] "r"(mbuf0 + 2)
1021 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1022 : [a] "+w"(senddesc01_w1)
1023 : [in] "r"(mbuf1 + 2)
1026 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1027 : [b] "+w"(senddesc23_w1)
1028 : [in] "r"(mbuf2 + 2)
1031 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1032 : [b] "+w"(senddesc23_w1)
1033 : [in] "r"(mbuf3 + 2)
1036 /* Get pool pointer alone */
1037 mbuf0 = (uint64_t *)*mbuf0;
1038 mbuf1 = (uint64_t *)*mbuf1;
1039 mbuf2 = (uint64_t *)*mbuf2;
1040 mbuf3 = (uint64_t *)*mbuf3;
1042 /* Get pool pointer alone */
1043 mbuf0 = (uint64_t *)*mbuf0;
1044 mbuf1 = (uint64_t *)*mbuf1;
1045 mbuf2 = (uint64_t *)*mbuf2;
1046 mbuf3 = (uint64_t *)*mbuf3;
1049 const uint8x16_t shuf_mask2 = {
1050 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1051 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1053 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1054 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1056 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1057 const uint64x2_t and_mask0 = {
1062 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1063 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1064 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1065 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1068 * Pick only 16 bits of pktlen preset at bits 63:32
1069 * and place them at bits 15:0.
1071 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1072 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1074 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1075 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1076 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1078 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1079 * pktlen at 15:0 position.
1081 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1082 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1083 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1084 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1086 /* Move mbuf to point to pool_id. */
1087 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1088 offsetof(struct rte_mempool, pool_id));
1089 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1090 offsetof(struct rte_mempool, pool_id));
1091 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1092 offsetof(struct rte_mempool, pool_id));
1093 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1094 offsetof(struct rte_mempool, pool_id));
1096 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1097 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1099 * Lookup table to translate ol_flags to
1100 * il3/il4 types. But we still use ol3/ol4 types in
1101 * senddesc_w1 as only one header processing is enabled.
1103 const uint8x16_t tbl = {
1104 /* [0-15] = il4type:il3type */
1105 0x04, /* none (IPv6 assumed) */
1106 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1107 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1108 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1109 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1110 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1111 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1112 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1113 0x02, /* RTE_MBUF_F_TX_IPV4 */
1114 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1115 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1116 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1117 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1118 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1119 * RTE_MBUF_F_TX_TCP_CKSUM
1121 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1122 * RTE_MBUF_F_TX_SCTP_CKSUM
1124 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1125 * RTE_MBUF_F_TX_UDP_CKSUM
1129 /* Extract olflags to translate to iltypes */
1130 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1131 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1134 * E(47):L3_LEN(9):L2_LEN(7+z)
1135 * E(47):L3_LEN(9):L2_LEN(7+z)
1137 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1138 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1140 /* Move OLFLAGS bits 55:52 to 51:48
1141 * with zeros preprended on the byte and rest
1144 xtmp128 = vshrq_n_u8(xtmp128, 4);
1145 ytmp128 = vshrq_n_u8(ytmp128, 4);
1147 * E(48):L3_LEN(8):L2_LEN(z+7)
1148 * E(48):L3_LEN(8):L2_LEN(z+7)
1150 const int8x16_t tshft3 = {
1151 -1, 0, 8, 8, 8, 8, 8, 8,
1152 -1, 0, 8, 8, 8, 8, 8, 8,
1155 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1156 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1159 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1160 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1162 /* Pick only relevant fields i.e Bit 48:55 of iltype
1163 * and place it in ol3/ol4type of senddesc_w1
1165 const uint8x16_t shuf_mask0 = {
1166 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1167 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1170 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1171 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1173 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1174 * a [E(32):E(16):OL3(8):OL2(8)]
1176 * a [E(32):E(16):(OL3+OL2):OL2]
1177 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1179 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1180 vshlq_n_u16(senddesc01_w1, 8));
1181 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1182 vshlq_n_u16(senddesc23_w1, 8));
1184 /* Move ltypes to senddesc*_w1 */
1185 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1186 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1187 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1188 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1190 * Lookup table to translate ol_flags to
1194 const uint8x16_t tbl = {
1195 /* [0-15] = ol4type:ol3type */
1197 0x03, /* OUTER_IP_CKSUM */
1198 0x02, /* OUTER_IPV4 */
1199 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1200 0x04, /* OUTER_IPV6 */
1201 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1202 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1203 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1206 0x00, /* OUTER_UDP_CKSUM */
1207 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1208 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1209 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1212 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1213 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1216 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1219 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1220 * OUTER_IPV4 | OUTER_IP_CKSUM
1224 /* Extract olflags to translate to iltypes */
1225 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1226 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1229 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1230 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1232 const uint8x16_t shuf_mask5 = {
1233 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1234 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1236 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1237 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1239 /* Extract outer ol flags only */
1240 const uint64x2_t o_cksum_mask = {
1245 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1246 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1248 /* Extract OUTER_UDP_CKSUM bit 41 and
1252 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1253 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1255 /* Shift oltype by 2 to start nibble from BIT(56)
1256 * instead of BIT(58)
1258 xtmp128 = vshrq_n_u8(xtmp128, 2);
1259 ytmp128 = vshrq_n_u8(ytmp128, 2);
1261 * E(48):L3_LEN(8):L2_LEN(z+7)
1262 * E(48):L3_LEN(8):L2_LEN(z+7)
1264 const int8x16_t tshft3 = {
1265 -1, 0, 8, 8, 8, 8, 8, 8,
1266 -1, 0, 8, 8, 8, 8, 8, 8,
1269 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1270 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1273 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1274 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1276 /* Pick only relevant fields i.e Bit 56:63 of oltype
1277 * and place it in ol3/ol4type of senddesc_w1
1279 const uint8x16_t shuf_mask0 = {
1280 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1281 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1284 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1285 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1287 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1288 * a [E(32):E(16):OL3(8):OL2(8)]
1290 * a [E(32):E(16):(OL3+OL2):OL2]
1291 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1293 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1294 vshlq_n_u16(senddesc01_w1, 8));
1295 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1296 vshlq_n_u16(senddesc23_w1, 8));
1298 /* Move ltypes to senddesc*_w1 */
1299 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1300 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1301 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1302 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1303 /* Lookup table to translate ol_flags to
1304 * ol4type, ol3type, il4type, il3type of senddesc_w1
1306 const uint8x16x2_t tbl = {{
1308 /* [0-15] = il4type:il3type */
1309 0x04, /* none (IPv6) */
1310 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1311 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1312 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1313 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1314 0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1315 * RTE_MBUF_F_TX_TCP_CKSUM
1317 0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
1318 * RTE_MBUF_F_TX_SCTP_CKSUM
1320 0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
1321 * RTE_MBUF_F_TX_UDP_CKSUM
1323 0x02, /* RTE_MBUF_F_TX_IPV4 */
1324 0x12, /* RTE_MBUF_F_TX_IPV4 |
1325 * RTE_MBUF_F_TX_TCP_CKSUM
1327 0x22, /* RTE_MBUF_F_TX_IPV4 |
1328 * RTE_MBUF_F_TX_SCTP_CKSUM
1330 0x32, /* RTE_MBUF_F_TX_IPV4 |
1331 * RTE_MBUF_F_TX_UDP_CKSUM
1333 0x03, /* RTE_MBUF_F_TX_IPV4 |
1334 * RTE_MBUF_F_TX_IP_CKSUM
1336 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1337 * RTE_MBUF_F_TX_TCP_CKSUM
1339 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1340 * RTE_MBUF_F_TX_SCTP_CKSUM
1342 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1343 * RTE_MBUF_F_TX_UDP_CKSUM
1348 /* [16-31] = ol4type:ol3type */
1350 0x03, /* OUTER_IP_CKSUM */
1351 0x02, /* OUTER_IPV4 */
1352 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1353 0x04, /* OUTER_IPV6 */
1354 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1355 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1356 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1359 0x00, /* OUTER_UDP_CKSUM */
1360 0x33, /* OUTER_UDP_CKSUM |
1363 0x32, /* OUTER_UDP_CKSUM |
1366 0x33, /* OUTER_UDP_CKSUM |
1367 * OUTER_IPV4 | OUTER_IP_CKSUM
1369 0x34, /* OUTER_UDP_CKSUM |
1372 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1375 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1378 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1379 * OUTER_IPV4 | OUTER_IP_CKSUM
1384 /* Extract olflags to translate to oltype & iltype */
1385 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1386 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1389 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1390 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1392 const uint32x4_t tshft_4 = {
1398 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1399 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1402 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1403 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1405 const uint8x16_t shuf_mask5 = {
1406 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1407 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1409 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1410 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1412 /* Extract outer and inner header ol_flags */
1413 const uint64x2_t oi_cksum_mask = {
1418 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1419 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1421 /* Extract OUTER_UDP_CKSUM bit 41 and
1425 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1426 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1428 /* Shift right oltype by 2 and iltype by 4
1429 * to start oltype nibble from BIT(58)
1430 * instead of BIT(56) and iltype nibble from BIT(48)
1431 * instead of BIT(52).
1433 const int8x16_t tshft5 = {
1434 8, 8, 8, 8, 8, 8, -4, -2,
1435 8, 8, 8, 8, 8, 8, -4, -2,
1438 xtmp128 = vshlq_u8(xtmp128, tshft5);
1439 ytmp128 = vshlq_u8(ytmp128, tshft5);
1441 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1442 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1444 const int8x16_t tshft3 = {
1445 -1, 0, -1, 0, 0, 0, 0, 0,
1446 -1, 0, -1, 0, 0, 0, 0, 0,
1449 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1450 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1452 /* Mark Bit(4) of oltype */
1453 const uint64x2_t oi_cksum_mask2 = {
1458 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1459 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1462 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1463 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1465 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1466 * Bit 56:63 of oltype and place it in corresponding
1467 * place in senddesc_w1.
1469 const uint8x16_t shuf_mask0 = {
1470 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1471 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1474 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1475 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1477 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1478 * l3len, l2len, ol3len, ol2len.
1479 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1481 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1483 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1484 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1486 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1487 vshlq_n_u32(senddesc01_w1, 8));
1488 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1489 vshlq_n_u32(senddesc23_w1, 8));
1491 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1492 senddesc01_w1 = vaddq_u8(
1493 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1494 senddesc23_w1 = vaddq_u8(
1495 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1497 /* Move ltypes to senddesc*_w1 */
1498 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1499 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1502 xmask01 = vdupq_n_u64(0);
1504 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1509 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1514 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1519 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1523 xmask01 = vshlq_n_u64(xmask01, 20);
1524 xmask23 = vshlq_n_u64(xmask23, 20);
1526 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1527 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1529 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1530 /* Tx ol_flag for vlan. */
1531 const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
1532 /* Bit enable for VLAN1 */
1533 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1534 /* Tx ol_flag for QnQ. */
1535 const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
1536 /* Bit enable for VLAN0 */
1537 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1538 /* Load vlan values from packet. outer is VLAN 0 */
1539 uint64x2_t ext01 = {
1540 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1541 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1542 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1543 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1545 uint64x2_t ext23 = {
1546 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1547 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1548 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1549 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1552 /* Get ol_flags of the packets. */
1553 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1554 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1556 /* ORR vlan outer/inner values into cmd. */
1557 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1558 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1560 /* Test for offload enable bits and generate masks. */
1561 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1563 vandq_u64(vtstq_u64(xtmp128, olq),
1565 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1567 vandq_u64(vtstq_u64(ytmp128, olq),
1570 /* Set vlan enable bits into cmd based on mask. */
1571 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1572 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1575 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1576 /* Tx ol_flag for timestamp. */
1577 const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
1578 RTE_MBUF_F_TX_IEEE1588_TMST};
1579 /* Set send mem alg to SUB. */
1580 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1581 /* Increment send mem address by 8. */
1582 const uint64x2_t addr = {0x8, 0x8};
1584 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1585 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1587 /* Check if timestamp is requested and generate inverted
1588 * mask as we need not make any changes to default cmd
1591 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1592 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1594 /* Change send mem address to an 8 byte offset when
1595 * TSTMP is disabled.
1597 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1598 vandq_u64(xtmp128, addr));
1599 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1600 vandq_u64(ytmp128, addr));
1601 /* Change send mem alg to SUB when TSTMP is disabled. */
1602 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1603 vandq_u64(xtmp128, alg));
1604 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1605 vandq_u64(ytmp128, alg));
1607 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1608 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1609 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1610 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1613 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1614 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1615 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1617 /* Extract SD W1 as we need to set L4 types. */
1618 vst1q_u64(sd_w1, senddesc01_w1);
1619 vst1q_u64(sd_w1 + 2, senddesc23_w1);
1621 /* Extract SX W0 as we need to set LSO fields. */
1622 vst1q_u64(sx_w0, sendext01_w0);
1623 vst1q_u64(sx_w0 + 2, sendext23_w0);
1625 /* Extract ol_flags. */
1626 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1627 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1629 /* Prepare individual mbufs. */
1630 cn9k_nix_prepare_tso(tx_pkts[0],
1631 (union nix_send_hdr_w1_u *)&sd_w1[0],
1632 (union nix_send_ext_w0_u *)&sx_w0[0],
1633 vgetq_lane_u64(xtmp128, 0), flags);
1635 cn9k_nix_prepare_tso(tx_pkts[1],
1636 (union nix_send_hdr_w1_u *)&sd_w1[1],
1637 (union nix_send_ext_w0_u *)&sx_w0[1],
1638 vgetq_lane_u64(xtmp128, 1), flags);
1640 cn9k_nix_prepare_tso(tx_pkts[2],
1641 (union nix_send_hdr_w1_u *)&sd_w1[2],
1642 (union nix_send_ext_w0_u *)&sx_w0[2],
1643 vgetq_lane_u64(ytmp128, 0), flags);
1645 cn9k_nix_prepare_tso(tx_pkts[3],
1646 (union nix_send_hdr_w1_u *)&sd_w1[3],
1647 (union nix_send_ext_w0_u *)&sx_w0[3],
1648 vgetq_lane_u64(ytmp128, 1), flags);
1650 senddesc01_w1 = vld1q_u64(sd_w1);
1651 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1653 sendext01_w0 = vld1q_u64(sx_w0);
1654 sendext23_w0 = vld1q_u64(sx_w0 + 2);
1657 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1658 !(flags & NIX_TX_MULTI_SEG_F)) {
1659 /* Set don't free bit if reference count > 1 */
1660 xmask01 = vdupq_n_u64(0);
1663 /* Move mbufs to iova */
1664 mbuf0 = (uint64_t *)tx_pkts[0];
1665 mbuf1 = (uint64_t *)tx_pkts[1];
1666 mbuf2 = (uint64_t *)tx_pkts[2];
1667 mbuf3 = (uint64_t *)tx_pkts[3];
1669 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1670 vsetq_lane_u64(0x80000, xmask01, 0);
1672 RTE_MEMPOOL_CHECK_COOKIES(
1673 ((struct rte_mbuf *)mbuf0)->pool,
1674 (void **)&mbuf0, 1, 0);
1676 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1677 vsetq_lane_u64(0x80000, xmask01, 1);
1679 RTE_MEMPOOL_CHECK_COOKIES(
1680 ((struct rte_mbuf *)mbuf1)->pool,
1681 (void **)&mbuf1, 1, 0);
1683 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1684 vsetq_lane_u64(0x80000, xmask23, 0);
1686 RTE_MEMPOOL_CHECK_COOKIES(
1687 ((struct rte_mbuf *)mbuf2)->pool,
1688 (void **)&mbuf2, 1, 0);
1690 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1691 vsetq_lane_u64(0x80000, xmask23, 1);
1693 RTE_MEMPOOL_CHECK_COOKIES(
1694 ((struct rte_mbuf *)mbuf3)->pool,
1695 (void **)&mbuf3, 1, 0);
1696 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1697 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1698 /* Ensuring mbuf fields which got updated in
1699 * cnxk_nix_prefree_seg are written before LMTST.
1702 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1703 /* Move mbufs to iova */
1704 mbuf0 = (uint64_t *)tx_pkts[0];
1705 mbuf1 = (uint64_t *)tx_pkts[1];
1706 mbuf2 = (uint64_t *)tx_pkts[2];
1707 mbuf3 = (uint64_t *)tx_pkts[3];
1709 /* Mark mempool object as "put" since
1710 * it is freed by NIX
1712 RTE_MEMPOOL_CHECK_COOKIES(
1713 ((struct rte_mbuf *)mbuf0)->pool,
1714 (void **)&mbuf0, 1, 0);
1716 RTE_MEMPOOL_CHECK_COOKIES(
1717 ((struct rte_mbuf *)mbuf1)->pool,
1718 (void **)&mbuf1, 1, 0);
1720 RTE_MEMPOOL_CHECK_COOKIES(
1721 ((struct rte_mbuf *)mbuf2)->pool,
1722 (void **)&mbuf2, 1, 0);
1724 RTE_MEMPOOL_CHECK_COOKIES(
1725 ((struct rte_mbuf *)mbuf3)->pool,
1726 (void **)&mbuf3, 1, 0);
1727 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1732 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1733 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1734 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1735 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1736 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1738 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1739 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1740 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1741 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1743 if (flags & NIX_TX_NEED_EXT_HDR) {
1744 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1745 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1746 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1747 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1750 if (flags & NIX_TX_MULTI_SEG_F) {
1751 uint64_t seg_list[NIX_DESCS_PER_LOOP]
1752 [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
1753 uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
1755 /* Build mseg list for each packet individually. */
1756 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1757 segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
1758 seg_list[j], &cmd0[j],
1762 /* Commit all changes to mbuf before LMTST. */
1763 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1766 cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
1770 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1771 /* With ext header in the command we can no longer send
1772 * all 4 packets together since LMTLINE is 128bytes.
1773 * Split and Tx twice.
1776 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1777 vst1q_u64(lmt_addr, cmd0[0]);
1778 vst1q_u64(lmt_addr + 2, cmd2[0]);
1779 vst1q_u64(lmt_addr + 4, cmd1[0]);
1780 vst1q_u64(lmt_addr + 6, cmd3[0]);
1781 vst1q_u64(lmt_addr + 8, cmd0[1]);
1782 vst1q_u64(lmt_addr + 10, cmd2[1]);
1783 vst1q_u64(lmt_addr + 12, cmd1[1]);
1784 vst1q_u64(lmt_addr + 14, cmd3[1]);
1786 vst1q_u64(lmt_addr, cmd0[0]);
1787 vst1q_u64(lmt_addr + 2, cmd2[0]);
1788 vst1q_u64(lmt_addr + 4, cmd1[0]);
1789 vst1q_u64(lmt_addr + 6, cmd0[1]);
1790 vst1q_u64(lmt_addr + 8, cmd2[1]);
1791 vst1q_u64(lmt_addr + 10, cmd1[1]);
1793 lmt_status = roc_lmt_submit_ldeor(io_addr);
1794 } while (lmt_status == 0);
1797 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1798 vst1q_u64(lmt_addr, cmd0[2]);
1799 vst1q_u64(lmt_addr + 2, cmd2[2]);
1800 vst1q_u64(lmt_addr + 4, cmd1[2]);
1801 vst1q_u64(lmt_addr + 6, cmd3[2]);
1802 vst1q_u64(lmt_addr + 8, cmd0[3]);
1803 vst1q_u64(lmt_addr + 10, cmd2[3]);
1804 vst1q_u64(lmt_addr + 12, cmd1[3]);
1805 vst1q_u64(lmt_addr + 14, cmd3[3]);
1807 vst1q_u64(lmt_addr, cmd0[2]);
1808 vst1q_u64(lmt_addr + 2, cmd2[2]);
1809 vst1q_u64(lmt_addr + 4, cmd1[2]);
1810 vst1q_u64(lmt_addr + 6, cmd0[3]);
1811 vst1q_u64(lmt_addr + 8, cmd2[3]);
1812 vst1q_u64(lmt_addr + 10, cmd1[3]);
1814 lmt_status = roc_lmt_submit_ldeor(io_addr);
1815 } while (lmt_status == 0);
1818 vst1q_u64(lmt_addr, cmd0[0]);
1819 vst1q_u64(lmt_addr + 2, cmd1[0]);
1820 vst1q_u64(lmt_addr + 4, cmd0[1]);
1821 vst1q_u64(lmt_addr + 6, cmd1[1]);
1822 vst1q_u64(lmt_addr + 8, cmd0[2]);
1823 vst1q_u64(lmt_addr + 10, cmd1[2]);
1824 vst1q_u64(lmt_addr + 12, cmd0[3]);
1825 vst1q_u64(lmt_addr + 14, cmd1[3]);
1826 lmt_status = roc_lmt_submit_ldeor(io_addr);
1827 } while (lmt_status == 0);
1829 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1832 if (unlikely(pkts_left)) {
1833 if (flags & NIX_TX_MULTI_SEG_F)
1834 pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
1835 pkts_left, cmd, flags);
1837 pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
1845 static __rte_always_inline uint16_t
1846 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1847 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1849 RTE_SET_USED(tx_queue);
1850 RTE_SET_USED(tx_pkts);
1853 RTE_SET_USED(flags);
1858 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1859 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1860 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1861 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1862 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1863 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1864 #define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F
1866 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1867 #define NIX_TX_FASTPATH_MODES_0_15 \
1868 T(no_offload, 4, NIX_TX_OFFLOAD_NONE) \
1869 T(l3l4csum, 4, L3L4CSUM_F) \
1870 T(ol3ol4csum, 4, OL3OL4CSUM_F) \
1871 T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F) \
1872 T(vlan, 6, VLAN_F) \
1873 T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F) \
1874 T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F) \
1875 T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1876 T(noff, 4, NOFF_F) \
1877 T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F) \
1878 T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F) \
1879 T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1880 T(noff_vlan, 6, NOFF_F | VLAN_F) \
1881 T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F) \
1882 T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1883 T(noff_vlan_ol3ol4csum_l3l4csum, 6, \
1884 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1886 #define NIX_TX_FASTPATH_MODES_16_31 \
1888 T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F) \
1889 T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F) \
1890 T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1891 T(tso_vlan, 6, TSO_F | VLAN_F) \
1892 T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F) \
1893 T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F) \
1894 T(tso_vlan_ol3ol4csum_l3l4csum, 6, \
1895 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1896 T(tso_noff, 6, TSO_F | NOFF_F) \
1897 T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F) \
1898 T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F) \
1899 T(tso_noff_ol3ol4csum_l3l4csum, 6, \
1900 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1901 T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F) \
1902 T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1903 T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1904 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
1905 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1907 #define NIX_TX_FASTPATH_MODES_32_47 \
1909 T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F) \
1910 T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F) \
1911 T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1912 T(ts_vlan, 8, TSP_F | VLAN_F) \
1913 T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F) \
1914 T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F) \
1915 T(ts_vlan_ol3ol4csum_l3l4csum, 8, \
1916 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1917 T(ts_noff, 8, TSP_F | NOFF_F) \
1918 T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F) \
1919 T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F) \
1920 T(ts_noff_ol3ol4csum_l3l4csum, 8, \
1921 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1922 T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F) \
1923 T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1924 T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1925 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
1926 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1928 #define NIX_TX_FASTPATH_MODES_48_63 \
1929 T(ts_tso, 8, TSP_F | TSO_F) \
1930 T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F) \
1931 T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F) \
1932 T(ts_tso_ol3ol4csum_l3l4csum, 8, \
1933 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1934 T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F) \
1935 T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1936 T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1937 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
1938 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1939 T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F) \
1940 T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1941 T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1942 T(ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
1943 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1944 T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F) \
1945 T(ts_tso_noff_vlan_l3l4csum, 8, \
1946 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1947 T(ts_tso_noff_vlan_ol3ol4csum, 8, \
1948 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1949 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
1950 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1952 #define NIX_TX_FASTPATH_MODES_64_79 \
1953 T(sec, 4, T_SEC_F) \
1954 T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F) \
1955 T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F) \
1956 T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1957 T(sec_vlan, 6, T_SEC_F | VLAN_F) \
1958 T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F) \
1959 T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F) \
1960 T(sec_vlan_ol3ol4csum_l3l4csum, 6, \
1961 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1962 T(sec_noff, 4, T_SEC_F | NOFF_F) \
1963 T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F) \
1964 T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F) \
1965 T(sec_noff_ol3ol4csum_l3l4csum, 4, \
1966 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1967 T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F) \
1968 T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1969 T(sec_noff_vlan_ol3ol4csum, 6, \
1970 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1971 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6, \
1972 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1974 #define NIX_TX_FASTPATH_MODES_80_95 \
1975 T(sec_tso, 6, T_SEC_F | TSO_F) \
1976 T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F) \
1977 T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F) \
1978 T(sec_tso_ol3ol4csum_l3l4csum, 6, \
1979 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1980 T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F) \
1981 T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1982 T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1983 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6, \
1984 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1985 T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F) \
1986 T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1987 T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1988 T(sec_tso_noff_ol3ol4csum_l3l4csum, 6, \
1989 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1990 T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F) \
1991 T(sec_tso_noff_vlan_l3l4csum, 6, \
1992 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1993 T(sec_tso_noff_vlan_ol3ol4csum, 6, \
1994 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1995 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
1996 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1998 #define NIX_TX_FASTPATH_MODES_96_111 \
1999 T(sec_ts, 8, T_SEC_F | TSP_F) \
2000 T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F) \
2001 T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F) \
2002 T(sec_ts_ol3ol4csum_l3l4csum, 8, \
2003 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2004 T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F) \
2005 T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \
2006 T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \
2007 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8, \
2008 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2009 T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F) \
2010 T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \
2011 T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \
2012 T(sec_ts_noff_ol3ol4csum_l3l4csum, 8, \
2013 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2014 T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F) \
2015 T(sec_ts_noff_vlan_l3l4csum, 8, \
2016 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2017 T(sec_ts_noff_vlan_ol3ol4csum, 8, \
2018 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2019 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2020 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2022 #define NIX_TX_FASTPATH_MODES_112_127 \
2023 T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F) \
2024 T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \
2025 T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \
2026 T(sec_ts_tso_ol3ol4csum_l3l4csum, 8, \
2027 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2028 T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F) \
2029 T(sec_ts_tso_vlan_l3l4csum, 8, \
2030 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2031 T(sec_ts_tso_vlan_ol3ol4csum, 8, \
2032 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2033 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2034 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2035 T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F) \
2036 T(sec_ts_tso_noff_l3l4csum, 8, \
2037 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2038 T(sec_ts_tso_noff_ol3ol4csum, 8, \
2039 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2040 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2041 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2042 T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \
2043 T(sec_ts_tso_noff_vlan_l3l4csum, 8, \
2044 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2045 T(sec_ts_tso_noff_vlan_ol3ol4csum, 8, \
2046 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2047 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2048 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2051 #define NIX_TX_FASTPATH_MODES \
2052 NIX_TX_FASTPATH_MODES_0_15 \
2053 NIX_TX_FASTPATH_MODES_16_31 \
2054 NIX_TX_FASTPATH_MODES_32_47 \
2055 NIX_TX_FASTPATH_MODES_48_63 \
2056 NIX_TX_FASTPATH_MODES_64_79 \
2057 NIX_TX_FASTPATH_MODES_80_95 \
2058 NIX_TX_FASTPATH_MODES_96_111 \
2059 NIX_TX_FASTPATH_MODES_112_127
2061 #define T(name, sz, flags) \
2062 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name( \
2063 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2064 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name( \
2065 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2066 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
2067 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
2068 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
2069 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2071 NIX_TX_FASTPATH_MODES
2074 #define NIX_TX_XMIT(fn, sz, flags) \
2075 uint16_t __rte_noinline __rte_hot fn( \
2076 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2079 /* For TSO inner checksum is a must */ \
2080 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2081 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2083 return cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \
2087 #define NIX_TX_XMIT_MSEG(fn, sz, flags) \
2088 uint16_t __rte_noinline __rte_hot fn( \
2089 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2091 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
2092 /* For TSO inner checksum is a must */ \
2093 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2094 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2096 return cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
2097 (flags) | NIX_TX_MULTI_SEG_F); \
2100 #define NIX_TX_XMIT_VEC(fn, sz, flags) \
2101 uint16_t __rte_noinline __rte_hot fn( \
2102 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2105 /* For TSO inner checksum is a must */ \
2106 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2107 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2109 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2113 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags) \
2114 uint16_t __rte_noinline __rte_hot fn( \
2115 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
2117 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
2118 /* For TSO inner checksum is a must */ \
2119 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
2120 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
2122 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2124 NIX_TX_MULTI_SEG_F); \
2127 #endif /* __CN9K_TX_H__ */