examples/l3fwd: share queue size variables
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #include <rte_eventdev.h>
10
11 #define NIX_TX_OFFLOAD_NONE           (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
19 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
20
21 /* Flags to control xmit_prepare function.
22  * Defining it from backwards to denote its been
23  * not used as offload flags to pick function
24  */
25 #define NIX_TX_VWQE_F      BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
27
28 #define NIX_TX_NEED_SEND_HDR_W1                                                \
29         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
30          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31
32 #define NIX_TX_NEED_EXT_HDR                                                    \
33         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
34          NIX_TX_OFFLOAD_TSO_F)
35
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
37         do {                                                                   \
38                 /* Cached value is low, Update the fc_cache_pkts */            \
39                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
40                         /* Multiply with sqe_per_sqb to express in pkts */     \
41                         (txq)->fc_cache_pkts =                                 \
42                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
43                                 << (txq)->sqes_per_sqb_log2;                   \
44                         /* Check it again for the room */                      \
45                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
46                                 return 0;                                      \
47                 }                                                              \
48         } while (0)
49
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51  * is encoded as 4bits.
52  */
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
54
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56
57 /* Function to determine no of tx subdesc required in case ext
58  * sub desc is enabled.
59  */
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 {
63         return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
64                              2 :
65                              ((flags &
66                          (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
67                                       1 :
68                                       0);
69 }
70
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73 {
74         if (!(flags & NIX_TX_MULTI_SEG_F))
75                 return cn10k_nix_tx_ext_subs(flags) + 2;
76
77         /* Already everything is accounted for in segdw */
78         return segdw;
79 }
80
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83 {
84         return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85                << ROC_LMT_LINES_PER_CORE_LOG2;
86 }
87
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90 {
91         return (flags & NIX_TX_NEED_EXT_HDR) ?
92                              ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
93                              8;
94 }
95
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
98 {
99         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
100         uint64_t data;
101
102         /* This will be moved to addr area */
103         data = dw_m1;
104         /* 15 vector sizes for single seg */
105         data |= dw_m1 << 19;
106         data |= dw_m1 << 22;
107         data |= dw_m1 << 25;
108         data |= dw_m1 << 28;
109         data |= dw_m1 << 31;
110         data |= dw_m1 << 34;
111         data |= dw_m1 << 37;
112         data |= dw_m1 << 40;
113         data |= dw_m1 << 43;
114         data |= dw_m1 << 46;
115         data |= dw_m1 << 49;
116         data |= dw_m1 << 52;
117         data |= dw_m1 << 55;
118         data |= dw_m1 << 58;
119         data |= dw_m1 << 61;
120
121         return data;
122 }
123
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126 {
127         return ((flags & NIX_TX_NEED_EXT_HDR) ?
128                               (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
129                               4);
130 }
131
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134 {
135         const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
136         uint64_t data;
137
138         /* This will be moved to addr area */
139         data = dw_m1;
140         /* 15 vector sizes for single seg */
141         data |= dw_m1 << 19;
142         data |= dw_m1 << 22;
143         data |= dw_m1 << 25;
144         data |= dw_m1 << 28;
145         data |= dw_m1 << 31;
146         data |= dw_m1 << 34;
147         data |= dw_m1 << 37;
148         data |= dw_m1 << 40;
149         data |= dw_m1 << 43;
150         data |= dw_m1 << 46;
151         data |= dw_m1 << 49;
152         data |= dw_m1 << 52;
153         data |= dw_m1 << 55;
154         data |= dw_m1 << 58;
155         data |= dw_m1 << 61;
156
157         return data;
158 }
159
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
162 {
163         /* We have two CPT instructions per LMTLine */
164         const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
165         uint64_t data;
166
167         /* This will be moved to addr area */
168         data = dw_m1 << 16;
169         data |= dw_m1 << 19;
170         data |= dw_m1 << 22;
171         data |= dw_m1 << 25;
172         data |= dw_m1 << 28;
173         data |= dw_m1 << 31;
174         data |= dw_m1 << 34;
175         data |= dw_m1 << 37;
176         data |= dw_m1 << 40;
177         data |= dw_m1 << 43;
178         data |= dw_m1 << 46;
179         data |= dw_m1 << 49;
180         data |= dw_m1 << 52;
181         data |= dw_m1 << 55;
182         data |= dw_m1 << 58;
183         data |= dw_m1 << 61;
184
185         return data;
186 }
187
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
190                       const uint16_t flags, const uint16_t static_sz)
191 {
192         if (static_sz)
193                 cmd[0] = txq->send_hdr_w0;
194         else
195                 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
196                          ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
197         cmd[1] = 0;
198
199         if (flags & NIX_TX_NEED_EXT_HDR) {
200                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
201                         cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
202                 else
203                         cmd[2] = NIX_SUBDC_EXT << 60;
204                 cmd[3] = 0;
205                 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
206         } else {
207                 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
208         }
209 }
210
211 static __rte_always_inline void
212 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
213                      uint8_t loff, uint8_t shft)
214 {
215         uint64_t data;
216         uintptr_t pa;
217
218         /* Check if there is any CPT instruction to submit */
219         if (!lnum && !loff)
220                 return;
221
222         data = cn10k_cpt_tx_steor_data();
223         /* Update lmtline use for partial end line */
224         if (loff) {
225                 data &= ~(0x7ULL << shft);
226                 /* Update it to half full i.e 64B */
227                 data |= (0x3UL << shft);
228         }
229
230         pa = io_addr | ((data >> 16) & 0x7) << 4;
231         data &= ~(0x7ULL << 16);
232         /* Update lines - 1 that contain valid data */
233         data |= ((uint64_t)(lnum + loff - 1)) << 12;
234         data |= lmt_id;
235
236         /* STEOR */
237         roc_lmt_submit_steorl(data, pa);
238 }
239
240 #if defined(RTE_ARCH_ARM64)
241 static __rte_always_inline void
242 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
243                        uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
244                        uint8_t *loff, uint8_t *shft, uint64_t sa_base,
245                        const uint16_t flags)
246 {
247         struct cn10k_sec_sess_priv sess_priv;
248         uint32_t pkt_len, dlen_adj, rlen;
249         uint64x2_t cmd01, cmd23;
250         uintptr_t dptr, nixtx;
251         uint64_t ucode_cmd[4];
252         uint64_t *laddr;
253         uint8_t l2_len;
254         uint16_t tag;
255         uint64_t sa;
256
257         sess_priv.u64 = *rte_security_dynfield(m);
258
259         if (flags & NIX_TX_NEED_SEND_HDR_W1)
260                 l2_len = vgetq_lane_u8(*cmd0, 8);
261         else
262                 l2_len = m->l2_len;
263
264         /* Retrieve DPTR */
265         dptr = vgetq_lane_u64(*cmd1, 1);
266         pkt_len = vgetq_lane_u16(*cmd0, 0);
267
268         /* Calculate dlen adj */
269         dlen_adj = pkt_len - l2_len;
270         rlen = (dlen_adj + sess_priv.roundup_len) +
271                (sess_priv.roundup_byte - 1);
272         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
273         rlen += sess_priv.partial_len;
274         dlen_adj = rlen - dlen_adj;
275
276         /* Update send descriptors. Security is single segment only */
277         *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
278         *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
279
280         /* Get area where NIX descriptor needs to be stored */
281         nixtx = dptr + pkt_len + dlen_adj;
282         nixtx += BIT_ULL(7);
283         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
284
285         /* Return nixtx addr */
286         *nixtx_addr = (nixtx + 16);
287
288         /* DLEN passed is excluding L2HDR */
289         pkt_len -= l2_len;
290         tag = sa_base & 0xFFFFUL;
291         sa_base &= ~0xFFFFUL;
292         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
293         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
294         ucode_cmd[0] =
295                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
296
297         /* CPT Word 0 and Word 1 */
298         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
299         /* CPT_RES_S is 16B above NIXTX */
300         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
301
302         /* CPT word 2 and 3 */
303         cmd23 = vdupq_n_u64(0);
304         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
305                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
306         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
307
308         dptr += l2_len;
309
310         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
311                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
312                         *((uint16_t *)(dptr - 2)) =
313                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
314                 else
315                         *((uint16_t *)(dptr - 2)) =
316                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
317         }
318
319         ucode_cmd[1] = dptr;
320         ucode_cmd[2] = dptr;
321
322         /* Move to our line */
323         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
324
325         /* Write CPT instruction to lmt line */
326         vst1q_u64(laddr, cmd01);
327         vst1q_u64((laddr + 2), cmd23);
328
329         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
330         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
331
332         /* Move to next line for every other CPT inst */
333         *loff = !(*loff);
334         *lnum = *lnum + (*loff ? 0 : 1);
335         *shft = *shft + (*loff ? 0 : 3);
336 }
337
338 static __rte_always_inline void
339 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
340                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
341                    uint64_t sa_base, const uint16_t flags)
342 {
343         struct cn10k_sec_sess_priv sess_priv;
344         uint32_t pkt_len, dlen_adj, rlen;
345         struct nix_send_hdr_s *send_hdr;
346         uint64x2_t cmd01, cmd23;
347         union nix_send_sg_s *sg;
348         uintptr_t dptr, nixtx;
349         uint64_t ucode_cmd[4];
350         uint64_t *laddr;
351         uint8_t l2_len;
352         uint16_t tag;
353         uint64_t sa;
354
355         /* Move to our line from base */
356         sess_priv.u64 = *rte_security_dynfield(m);
357         send_hdr = (struct nix_send_hdr_s *)cmd;
358         if (flags & NIX_TX_NEED_EXT_HDR)
359                 sg = (union nix_send_sg_s *)&cmd[4];
360         else
361                 sg = (union nix_send_sg_s *)&cmd[2];
362
363         if (flags & NIX_TX_NEED_SEND_HDR_W1)
364                 l2_len = cmd[1] & 0xFF;
365         else
366                 l2_len = m->l2_len;
367
368         /* Retrieve DPTR */
369         dptr = *(uint64_t *)(sg + 1);
370         pkt_len = send_hdr->w0.total;
371
372         /* Calculate dlen adj */
373         dlen_adj = pkt_len - l2_len;
374         rlen = (dlen_adj + sess_priv.roundup_len) +
375                (sess_priv.roundup_byte - 1);
376         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
377         rlen += sess_priv.partial_len;
378         dlen_adj = rlen - dlen_adj;
379
380         /* Update send descriptors. Security is single segment only */
381         send_hdr->w0.total = pkt_len + dlen_adj;
382         sg->seg1_size = pkt_len + dlen_adj;
383
384         /* Get area where NIX descriptor needs to be stored */
385         nixtx = dptr + pkt_len + dlen_adj;
386         nixtx += BIT_ULL(7);
387         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
388
389         /* Return nixtx addr */
390         *nixtx_addr = (nixtx + 16);
391
392         /* DLEN passed is excluding L2HDR */
393         pkt_len -= l2_len;
394         tag = sa_base & 0xFFFFUL;
395         sa_base &= ~0xFFFFUL;
396         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
397         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
398         ucode_cmd[0] =
399                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
400
401         /* CPT Word 0 and Word 1. Assume no multi-seg support */
402         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
403         /* CPT_RES_S is 16B above NIXTX */
404         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
405
406         /* CPT word 2 and 3 */
407         cmd23 = vdupq_n_u64(0);
408         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
409                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
410         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
411
412         dptr += l2_len;
413
414         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
415                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
416                         *((uint16_t *)(dptr - 2)) =
417                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
418                 else
419                         *((uint16_t *)(dptr - 2)) =
420                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
421         }
422         ucode_cmd[1] = dptr;
423         ucode_cmd[2] = dptr;
424
425         /* Move to our line */
426         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
427
428         /* Write CPT instruction to lmt line */
429         vst1q_u64(laddr, cmd01);
430         vst1q_u64((laddr + 2), cmd23);
431
432         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
433         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
434
435         /* Move to next line for every other CPT inst */
436         *loff = !(*loff);
437         *lnum = *lnum + (*loff ? 0 : 1);
438         *shft = *shft + (*loff ? 0 : 3);
439 }
440
441 #else
442
443 static __rte_always_inline void
444 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
445                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
446                    uint64_t sa_base, const uint16_t flags)
447 {
448         RTE_SET_USED(m);
449         RTE_SET_USED(cmd);
450         RTE_SET_USED(nixtx_addr);
451         RTE_SET_USED(lbase);
452         RTE_SET_USED(lnum);
453         RTE_SET_USED(loff);
454         RTE_SET_USED(shft);
455         RTE_SET_USED(sa_base);
456         RTE_SET_USED(flags);
457 }
458 #endif
459
460 static __rte_always_inline void
461 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
462 {
463         uint64_t mask, ol_flags = m->ol_flags;
464
465         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
466                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
467                 uint16_t *iplen, *oiplen, *oudplen;
468                 uint16_t lso_sb, paylen;
469
470                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
471                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
472                          m->l2_len + m->l3_len + m->l4_len;
473
474                 /* Reduce payload len from base headers */
475                 paylen = m->pkt_len - lso_sb;
476
477                 /* Get iplen position assuming no tunnel hdr */
478                 iplen = (uint16_t *)(mdata + m->l2_len +
479                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
480                 /* Handle tunnel tso */
481                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
482                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
483                         const uint8_t is_udp_tun =
484                                 (CNXK_NIX_UDP_TUN_BITMASK >>
485                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
486                                 0x1;
487
488                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
489                                               (2 << !!(ol_flags &
490                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
491                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
492                                                    paylen);
493
494                         /* Update format for UDP tunneled packet */
495                         if (is_udp_tun) {
496                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
497                                                        m->outer_l3_len + 4);
498                                 *oudplen = rte_cpu_to_be_16(
499                                         rte_be_to_cpu_16(*oudplen) - paylen);
500                         }
501
502                         /* Update iplen position to inner ip hdr */
503                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
504                                              m->l4_len +
505                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
506                 }
507
508                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
509         }
510 }
511
512 static __rte_always_inline void
513 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
514                        const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
515                        uint64_t mark_fmt)
516 {
517         uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
518         struct nix_send_ext_s *send_hdr_ext;
519         struct nix_send_hdr_s *send_hdr;
520         uint64_t ol_flags = 0, mask;
521         union nix_send_hdr_w1_u w1;
522         union nix_send_sg_s *sg;
523         uint16_t mark_form = 0;
524
525         send_hdr = (struct nix_send_hdr_s *)cmd;
526         if (flags & NIX_TX_NEED_EXT_HDR) {
527                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
528                 sg = (union nix_send_sg_s *)(cmd + 4);
529                 /* Clear previous markings */
530                 send_hdr_ext->w0.lso = 0;
531                 send_hdr_ext->w0.mark_en = 0;
532                 send_hdr_ext->w1.u = 0;
533                 ol_flags = m->ol_flags;
534         } else {
535                 sg = (union nix_send_sg_s *)(cmd + 2);
536         }
537
538         if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
539                 ol_flags = m->ol_flags;
540                 w1.u = 0;
541         }
542
543         if (!(flags & NIX_TX_MULTI_SEG_F))
544                 send_hdr->w0.total = m->data_len;
545         else
546                 send_hdr->w0.total = m->pkt_len;
547         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
548
549         /*
550          * L3type:  2 => IPV4
551          *          3 => IPV4 with csum
552          *          4 => IPV6
553          * L3type and L3ptr needs to be set for either
554          * L3 csum or L4 csum or LSO
555          *
556          */
557
558         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
559             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
560                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
561                 const uint8_t ol3type =
562                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
563                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
564                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
565
566                 /* Outer L3 */
567                 w1.ol3type = ol3type;
568                 mask = 0xffffull << ((!!ol3type) << 4);
569                 w1.ol3ptr = ~mask & m->outer_l2_len;
570                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
571
572                 /* Outer L4 */
573                 w1.ol4type = csum + (csum << 1);
574
575                 /* Inner L3 */
576                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
577                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
578                 w1.il3ptr = w1.ol4ptr + m->l2_len;
579                 w1.il4ptr = w1.il3ptr + m->l3_len;
580                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
581                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
582
583                 /* Inner L4 */
584                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
585
586                 /* In case of no tunnel header use only
587                  * shift IL3/IL4 fields a bit to use
588                  * OL3/OL4 for header checksum
589                  */
590                 mask = !ol3type;
591                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
592                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
593
594         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
595                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
596                 const uint8_t outer_l2_len = m->outer_l2_len;
597
598                 /* Outer L3 */
599                 w1.ol3ptr = outer_l2_len;
600                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
601                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
602                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
603                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
604                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
605
606                 /* Outer L4 */
607                 w1.ol4type = csum + (csum << 1);
608
609         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
610                 const uint8_t l2_len = m->l2_len;
611
612                 /* Always use OLXPTR and OLXTYPE when only
613                  * when one header is present
614                  */
615
616                 /* Inner L3 */
617                 w1.ol3ptr = l2_len;
618                 w1.ol4ptr = l2_len + m->l3_len;
619                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
620                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
621                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
622                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
623
624                 /* Inner L4 */
625                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
626         }
627
628         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
629                 const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
630                 const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
631                                                   RTE_MBUF_F_TX_IPV6));
632
633                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
634                 /* HW will update ptr after vlan0 update */
635                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
636                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
637
638                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
639                 /* 2B before end of l2 header */
640                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
641                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
642                 /* Fill for VLAN marking only when VLAN insertion enabled */
643                 mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
644                              (send_hdr_ext->w1.vlan1_ins_ena ||
645                               send_hdr_ext->w1.vlan0_ins_ena));
646
647                 /* Mask requested flags with packet data information */
648                 mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
649                 mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
650
651                 mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
652                 mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
653                 markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
654
655                 send_hdr_ext->w0.mark_en = !!mark_off;
656                 send_hdr_ext->w0.markform = mark_form & 0x7F;
657                 send_hdr_ext->w0.markptr = markptr;
658         }
659
660         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
661                 uint16_t lso_sb;
662                 uint64_t mask;
663
664                 mask = -(!w1.il3type);
665                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
666
667                 send_hdr_ext->w0.lso_sb = lso_sb;
668                 send_hdr_ext->w0.lso = 1;
669                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
670                 send_hdr_ext->w0.lso_format =
671                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
672                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
673
674                 /* Handle tunnel tso */
675                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
676                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
677                         const uint8_t is_udp_tun =
678                                 (CNXK_NIX_UDP_TUN_BITMASK >>
679                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
680                                 0x1;
681                         uint8_t shift = is_udp_tun ? 32 : 0;
682
683                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
684                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
685
686                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
687                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
688                         /* Update format for UDP tunneled packet */
689                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
690                 }
691         }
692
693         if (flags & NIX_TX_NEED_SEND_HDR_W1)
694                 send_hdr->w1.u = w1.u;
695
696         if (!(flags & NIX_TX_MULTI_SEG_F)) {
697                 sg->seg1_size = send_hdr->w0.total;
698                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
699
700                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
701                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
702                          *              is greater than 1
703                          * DF bit = 0 otherwise
704                          */
705                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
706                 }
707                 /* Mark mempool object as "put" since it is freed by NIX */
708                 if (!send_hdr->w0.df)
709                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
710         } else {
711                 sg->seg1_size = m->data_len;
712                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
713
714                 /* NOFF is handled later for multi-seg */
715         }
716
717         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
718                 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
719 }
720
721 static __rte_always_inline void
722 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
723                            const uint16_t flags)
724 {
725         struct nix_send_ext_s *send_hdr_ext;
726         union nix_send_sg_s *sg;
727
728         /* With minimal offloads, 'cmd' being local could be optimized out to
729          * registers. In other cases, 'cmd' will be in stack. Intent is
730          * 'cmd' stores content from txq->cmd which is copied only once.
731          */
732         *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
733         lmt_addr += 16;
734         if (flags & NIX_TX_NEED_EXT_HDR) {
735                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
736                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
737                 lmt_addr += 16;
738
739                 sg = (union nix_send_sg_s *)(cmd + 4);
740         } else {
741                 sg = (union nix_send_sg_s *)(cmd + 2);
742         }
743         /* In case of multi-seg, sg template is stored here */
744         *((union nix_send_sg_s *)lmt_addr) = *sg;
745         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
746 }
747
748 static __rte_always_inline void
749 cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
750                               const uint64_t ol_flags, const uint16_t no_segdw,
751                               const uint16_t flags)
752 {
753         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
754                 const uint8_t is_ol_tstamp =
755                         !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
756                 uint64_t *lmt = (uint64_t *)lmt_addr;
757                 uint16_t off = (no_segdw - 1) << 1;
758                 struct nix_send_mem_s *send_mem;
759
760                 send_mem = (struct nix_send_mem_s *)(lmt + off);
761                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
762                  * should not be recorded, hence changing the alg type to
763                  * NIX_SENDMEMALG_SUB and also changing send mem addr field to
764                  * next 8 bytes as it corrupts the actual Tx tstamp registered
765                  * address.
766                  */
767                 send_mem->w0.subdc = NIX_SUBDC_MEM;
768                 send_mem->w0.alg =
769                         NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
770                 send_mem->addr =
771                         (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
772         }
773 }
774
775 static __rte_always_inline uint16_t
776 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
777 {
778         struct nix_send_hdr_s *send_hdr;
779         union nix_send_sg_s *sg;
780         struct rte_mbuf *m_next;
781         uint64_t *slist, sg_u;
782         uint64_t nb_segs;
783         uint64_t segdw;
784         uint8_t off, i;
785
786         send_hdr = (struct nix_send_hdr_s *)cmd;
787
788         if (flags & NIX_TX_NEED_EXT_HDR)
789                 off = 2;
790         else
791                 off = 0;
792
793         sg = (union nix_send_sg_s *)&cmd[2 + off];
794
795         /* Start from second segment, first segment is already there */
796         i = 1;
797         sg_u = sg->u;
798         nb_segs = m->nb_segs - 1;
799         m_next = m->next;
800         slist = &cmd[3 + off + 1];
801
802         /* Set invert df if buffer is not to be freed by H/W */
803         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
804                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
805
806                 /* Mark mempool object as "put" since it is freed by NIX */
807 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
808         if (!(sg_u & (1ULL << 55)))
809                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
810         rte_io_wmb();
811 #endif
812         m = m_next;
813         if (!m)
814                 goto done;
815
816         /* Fill mbuf segments */
817         do {
818                 m_next = m->next;
819                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
820                 *slist = rte_mbuf_data_iova(m);
821                 /* Set invert df if buffer is not to be freed by H/W */
822                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
823                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
824                         /* Mark mempool object as "put" since it is freed by NIX
825                          */
826 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
827                 if (!(sg_u & (1ULL << (i + 55))))
828                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
829 #endif
830                 slist++;
831                 i++;
832                 nb_segs--;
833                 if (i > 2 && nb_segs) {
834                         i = 0;
835                         /* Next SG subdesc */
836                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
837                         sg->u = sg_u;
838                         sg->segs = 3;
839                         sg = (union nix_send_sg_s *)slist;
840                         sg_u = sg->u;
841                         slist++;
842                 }
843                 m = m_next;
844         } while (nb_segs);
845
846 done:
847         sg->u = sg_u;
848         sg->segs = i;
849         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
850         /* Roundup extra dwords to multiple of 2 */
851         segdw = (segdw >> 1) + (segdw & 0x1);
852         /* Default dwords */
853         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
854         send_hdr->w0.sizem1 = segdw - 1;
855
856         return segdw;
857 }
858
859 static __rte_always_inline uint16_t
860 cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
861                     uint16_t pkts, uint64_t *cmd, const uint16_t flags)
862 {
863         struct cn10k_eth_txq *txq = tx_queue;
864         const rte_iova_t io_addr = txq->io_addr;
865         uint8_t lnum, c_lnum, c_shft, c_loff;
866         uintptr_t pa, lbase = txq->lmt_base;
867         uint16_t lmt_id, burst, left, i;
868         uintptr_t c_lbase = lbase;
869         uint64_t mark_fmt = 0;
870         uint8_t mark_flag = 0;
871         rte_iova_t c_io_addr;
872         uint64_t lso_tun_fmt;
873         uint16_t c_lmt_id;
874         uint64_t sa_base;
875         uintptr_t laddr;
876         uint64_t data;
877         bool sec;
878
879         if (!(flags & NIX_TX_VWQE_F)) {
880                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
881                 /* Reduce the cached count */
882                 txq->fc_cache_pkts -= pkts;
883         }
884         /* Get cmd skeleton */
885         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
886
887         if (flags & NIX_TX_OFFLOAD_TSO_F)
888                 lso_tun_fmt = txq->lso_tun_fmt;
889
890         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
891                 mark_fmt = txq->mark_fmt;
892                 mark_flag = txq->mark_flag;
893         }
894
895         /* Get LMT base address and LMT ID as lcore id */
896         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
897         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
898                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
899                 c_io_addr = txq->cpt_io_addr;
900                 sa_base = txq->sa_base;
901         }
902
903         left = pkts;
904 again:
905         burst = left > 32 ? 32 : left;
906
907         lnum = 0;
908         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
909                 c_lnum = 0;
910                 c_loff = 0;
911                 c_shft = 16;
912         }
913
914         for (i = 0; i < burst; i++) {
915                 /* Perform header writes for TSO, barrier at
916                  * lmt steorl will suffice.
917                  */
918                 if (flags & NIX_TX_OFFLOAD_TSO_F)
919                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
920
921                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
922                                        &sec, mark_flag, mark_fmt);
923
924                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
925
926                 /* Prepare CPT instruction and get nixtx addr */
927                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
928                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
929                                            &c_lnum, &c_loff, &c_shft, sa_base,
930                                            flags);
931
932                 /* Move NIX desc to LMT/NIXTX area */
933                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
934                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
935                                               4, flags);
936                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
937                         lnum++;
938         }
939
940         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
941                 ws[1] = roc_sso_hws_head_wait(ws[0]);
942
943         left -= burst;
944         tx_pkts += burst;
945
946         /* Submit CPT instructions if any */
947         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
948                 /* Reduce pkts to be sent to CPT */
949                 burst -= ((c_lnum << 1) + c_loff);
950                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
951                                      c_shft);
952         }
953
954         /* Trigger LMTST */
955         if (burst > 16) {
956                 data = cn10k_nix_tx_steor_data(flags);
957                 pa = io_addr | (data & 0x7) << 4;
958                 data &= ~0x7ULL;
959                 data |= (15ULL << 12);
960                 data |= (uint64_t)lmt_id;
961
962                 /* STEOR0 */
963                 roc_lmt_submit_steorl(data, pa);
964
965                 data = cn10k_nix_tx_steor_data(flags);
966                 pa = io_addr | (data & 0x7) << 4;
967                 data &= ~0x7ULL;
968                 data |= ((uint64_t)(burst - 17)) << 12;
969                 data |= (uint64_t)(lmt_id + 16);
970
971                 /* STEOR1 */
972                 roc_lmt_submit_steorl(data, pa);
973         } else if (burst) {
974                 data = cn10k_nix_tx_steor_data(flags);
975                 pa = io_addr | (data & 0x7) << 4;
976                 data &= ~0x7ULL;
977                 data |= ((uint64_t)(burst - 1)) << 12;
978                 data |= lmt_id;
979
980                 /* STEOR0 */
981                 roc_lmt_submit_steorl(data, pa);
982         }
983
984         rte_io_wmb();
985         if (left)
986                 goto again;
987
988         return pkts;
989 }
990
991 static __rte_always_inline uint16_t
992 cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
993                          struct rte_mbuf **tx_pkts, uint16_t pkts,
994                          uint64_t *cmd, const uint16_t flags)
995 {
996         struct cn10k_eth_txq *txq = tx_queue;
997         uintptr_t pa0, pa1, lbase = txq->lmt_base;
998         const rte_iova_t io_addr = txq->io_addr;
999         uint16_t segdw, lmt_id, burst, left, i;
1000         uint8_t lnum, c_lnum, c_loff;
1001         uintptr_t c_lbase = lbase;
1002         uint64_t mark_fmt = 0;
1003         uint8_t mark_flag = 0;
1004         uint64_t data0, data1;
1005         rte_iova_t c_io_addr;
1006         uint64_t lso_tun_fmt;
1007         uint8_t shft, c_shft;
1008         __uint128_t data128;
1009         uint16_t c_lmt_id;
1010         uint64_t sa_base;
1011         uintptr_t laddr;
1012         bool sec;
1013
1014         if (!(flags & NIX_TX_VWQE_F)) {
1015                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1016                 /* Reduce the cached count */
1017                 txq->fc_cache_pkts -= pkts;
1018         }
1019         /* Get cmd skeleton */
1020         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
1021
1022         if (flags & NIX_TX_OFFLOAD_TSO_F)
1023                 lso_tun_fmt = txq->lso_tun_fmt;
1024
1025         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1026                 mark_fmt = txq->mark_fmt;
1027                 mark_flag = txq->mark_flag;
1028         }
1029
1030         /* Get LMT base address and LMT ID as lcore id */
1031         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1032         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1033                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1034                 c_io_addr = txq->cpt_io_addr;
1035                 sa_base = txq->sa_base;
1036         }
1037
1038         left = pkts;
1039 again:
1040         burst = left > 32 ? 32 : left;
1041         shft = 16;
1042         data128 = 0;
1043
1044         lnum = 0;
1045         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1046                 c_lnum = 0;
1047                 c_loff = 0;
1048                 c_shft = 16;
1049         }
1050
1051         for (i = 0; i < burst; i++) {
1052                 /* Perform header writes for TSO, barrier at
1053                  * lmt steorl will suffice.
1054                  */
1055                 if (flags & NIX_TX_OFFLOAD_TSO_F)
1056                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1057
1058                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1059                                        &sec, mark_flag, mark_fmt);
1060
1061                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1062
1063                 /* Prepare CPT instruction and get nixtx addr */
1064                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1065                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1066                                            &c_lnum, &c_loff, &c_shft, sa_base,
1067                                            flags);
1068
1069                 /* Move NIX desc to LMT/NIXTX area */
1070                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1071                 /* Store sg list directly on lmt line */
1072                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1073                                                flags);
1074                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1075                                               segdw, flags);
1076                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1077                         lnum++;
1078                         data128 |= (((__uint128_t)(segdw - 1)) << shft);
1079                         shft += 3;
1080                 }
1081         }
1082
1083         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1084                 ws[1] = roc_sso_hws_head_wait(ws[0]);
1085
1086         left -= burst;
1087         tx_pkts += burst;
1088
1089         /* Submit CPT instructions if any */
1090         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1091                 /* Reduce pkts to be sent to CPT */
1092                 burst -= ((c_lnum << 1) + c_loff);
1093                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1094                                      c_shft);
1095         }
1096
1097         data0 = (uint64_t)data128;
1098         data1 = (uint64_t)(data128 >> 64);
1099         /* Make data0 similar to data1 */
1100         data0 >>= 16;
1101         /* Trigger LMTST */
1102         if (burst > 16) {
1103                 pa0 = io_addr | (data0 & 0x7) << 4;
1104                 data0 &= ~0x7ULL;
1105                 /* Move lmtst1..15 sz to bits 63:19 */
1106                 data0 <<= 16;
1107                 data0 |= (15ULL << 12);
1108                 data0 |= (uint64_t)lmt_id;
1109
1110                 /* STEOR0 */
1111                 roc_lmt_submit_steorl(data0, pa0);
1112
1113                 pa1 = io_addr | (data1 & 0x7) << 4;
1114                 data1 &= ~0x7ULL;
1115                 data1 <<= 16;
1116                 data1 |= ((uint64_t)(burst - 17)) << 12;
1117                 data1 |= (uint64_t)(lmt_id + 16);
1118
1119                 /* STEOR1 */
1120                 roc_lmt_submit_steorl(data1, pa1);
1121         } else if (burst) {
1122                 pa0 = io_addr | (data0 & 0x7) << 4;
1123                 data0 &= ~0x7ULL;
1124                 /* Move lmtst1..15 sz to bits 63:19 */
1125                 data0 <<= 16;
1126                 data0 |= ((burst - 1) << 12);
1127                 data0 |= (uint64_t)lmt_id;
1128
1129                 /* STEOR0 */
1130                 roc_lmt_submit_steorl(data0, pa0);
1131         }
1132
1133         rte_io_wmb();
1134         if (left)
1135                 goto again;
1136
1137         return pkts;
1138 }
1139
1140 #if defined(RTE_ARCH_ARM64)
1141
1142 static __rte_always_inline void
1143 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1144                       union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1145                       const uint64_t flags, const uint64_t lso_tun_fmt)
1146 {
1147         uint16_t lso_sb;
1148         uint64_t mask;
1149
1150         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1151                 return;
1152
1153         mask = -(!w1->il3type);
1154         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1155
1156         w0->u |= BIT(14);
1157         w0->lso_sb = lso_sb;
1158         w0->lso_mps = m->tso_segsz;
1159         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1160         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1161
1162         /* Handle tunnel tso */
1163         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1164             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1165                 const uint8_t is_udp_tun =
1166                         (CNXK_NIX_UDP_TUN_BITMASK >>
1167                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1168                         0x1;
1169                 uint8_t shift = is_udp_tun ? 32 : 0;
1170
1171                 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1172                 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1173
1174                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1175                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1176                 /* Update format for UDP tunneled packet */
1177
1178                 w0->lso_format = (lso_tun_fmt >> shift);
1179         }
1180 }
1181
1182 static __rte_always_inline void
1183 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1184                                 union nix_send_hdr_w0_u *sh,
1185                                 union nix_send_sg_s *sg, const uint32_t flags)
1186 {
1187         struct rte_mbuf *m_next;
1188         uint64_t *slist, sg_u;
1189         uint16_t nb_segs;
1190         int i = 1;
1191
1192         sh->total = m->pkt_len;
1193         /* Clear sg->u header before use */
1194         sg->u &= 0xFC00000000000000;
1195         sg_u = sg->u;
1196         slist = &cmd[0];
1197
1198         sg_u = sg_u | ((uint64_t)m->data_len);
1199
1200         nb_segs = m->nb_segs - 1;
1201         m_next = m->next;
1202
1203         /* Set invert df if buffer is not to be freed by H/W */
1204         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1205                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1206                 /* Mark mempool object as "put" since it is freed by NIX */
1207 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1208         if (!(sg_u & (1ULL << 55)))
1209                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1210         rte_io_wmb();
1211 #endif
1212
1213         m = m_next;
1214         /* Fill mbuf segments */
1215         do {
1216                 m_next = m->next;
1217                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1218                 *slist = rte_mbuf_data_iova(m);
1219                 /* Set invert df if buffer is not to be freed by H/W */
1220                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1221                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1222                         /* Mark mempool object as "put" since it is freed by NIX
1223                          */
1224 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1225                 if (!(sg_u & (1ULL << (i + 55))))
1226                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1227                 rte_io_wmb();
1228 #endif
1229                 slist++;
1230                 i++;
1231                 nb_segs--;
1232                 if (i > 2 && nb_segs) {
1233                         i = 0;
1234                         /* Next SG subdesc */
1235                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1236                         sg->u = sg_u;
1237                         sg->segs = 3;
1238                         sg = (union nix_send_sg_s *)slist;
1239                         sg_u = sg->u;
1240                         slist++;
1241                 }
1242                 m = m_next;
1243         } while (nb_segs);
1244
1245         sg->u = sg_u;
1246         sg->segs = i;
1247 }
1248
1249 static __rte_always_inline void
1250 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1251                            uint64x2_t *cmd1, const uint8_t segdw,
1252                            const uint32_t flags)
1253 {
1254         union nix_send_hdr_w0_u sh;
1255         union nix_send_sg_s sg;
1256
1257         if (m->nb_segs == 1) {
1258                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1259                         sg.u = vgetq_lane_u64(cmd1[0], 0);
1260                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1261                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1262                 }
1263
1264 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1265                 sg.u = vgetq_lane_u64(cmd1[0], 0);
1266                 if (!(sg.u & (1ULL << 55)))
1267                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1268                 rte_io_wmb();
1269 #endif
1270                 return;
1271         }
1272
1273         sh.u = vgetq_lane_u64(cmd0[0], 0);
1274         sg.u = vgetq_lane_u64(cmd1[0], 0);
1275
1276         cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1277
1278         sh.sizem1 = segdw - 1;
1279         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1280         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1281 }
1282
1283 #define NIX_DESCS_PER_LOOP 4
1284
1285 static __rte_always_inline uint8_t
1286 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1287                                uint64x2_t *cmd1, uint64x2_t *cmd2,
1288                                uint64x2_t *cmd3, uint8_t *segdw,
1289                                uint64_t *lmt_addr, __uint128_t *data128,
1290                                uint8_t *shift, const uint16_t flags)
1291 {
1292         uint8_t j, off, lmt_used;
1293
1294         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1295             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1296                 /* No segments in 4 consecutive packets. */
1297                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1298                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1299                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1300                                                            &cmd0[j], &cmd1[j],
1301                                                            segdw[j], flags);
1302                         vst1q_u64(lmt_addr, cmd0[0]);
1303                         vst1q_u64(lmt_addr + 2, cmd1[0]);
1304                         vst1q_u64(lmt_addr + 4, cmd0[1]);
1305                         vst1q_u64(lmt_addr + 6, cmd1[1]);
1306                         vst1q_u64(lmt_addr + 8, cmd0[2]);
1307                         vst1q_u64(lmt_addr + 10, cmd1[2]);
1308                         vst1q_u64(lmt_addr + 12, cmd0[3]);
1309                         vst1q_u64(lmt_addr + 14, cmd1[3]);
1310
1311                         *data128 |= ((__uint128_t)7) << *shift;
1312                         *shift += 3;
1313
1314                         return 1;
1315                 }
1316         }
1317
1318         lmt_used = 0;
1319         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1320                 /* Fit consecutive packets in same LMTLINE. */
1321                 if ((segdw[j] + segdw[j + 1]) <= 8) {
1322                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1323                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1324                                                            &cmd0[j], &cmd1[j],
1325                                                            segdw[j], flags);
1326                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1327                                                            &cmd0[j + 1],
1328                                                            &cmd1[j + 1],
1329                                                            segdw[j + 1], flags);
1330                                 /* TSTAMP takes 4 each, no segs. */
1331                                 vst1q_u64(lmt_addr, cmd0[j]);
1332                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1333                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1334                                 vst1q_u64(lmt_addr + 6, cmd3[j]);
1335
1336                                 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1337                                 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1338                                 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1339                                 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1340                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1341                                 /* EXT header take 3 each, space for 2 segs.*/
1342                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1343                                                            lmt_addr + 6,
1344                                                            &cmd0[j], &cmd1[j],
1345                                                            segdw[j], flags);
1346                                 vst1q_u64(lmt_addr, cmd0[j]);
1347                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1348                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1349                                 off = segdw[j] - 3;
1350                                 off <<= 1;
1351                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1352                                                            lmt_addr + 12 + off,
1353                                                            &cmd0[j + 1],
1354                                                            &cmd1[j + 1],
1355                                                            segdw[j + 1], flags);
1356                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1357                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1358                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1359                         } else {
1360                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1361                                                            lmt_addr + 4,
1362                                                            &cmd0[j], &cmd1[j],
1363                                                            segdw[j], flags);
1364                                 vst1q_u64(lmt_addr, cmd0[j]);
1365                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1366                                 off = segdw[j] - 2;
1367                                 off <<= 1;
1368                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1369                                                            lmt_addr + 8 + off,
1370                                                            &cmd0[j + 1],
1371                                                            &cmd1[j + 1],
1372                                                            segdw[j + 1], flags);
1373                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1374                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1375                         }
1376                         *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1377                                     << *shift;
1378                         *shift += 3;
1379                         j += 2;
1380                 } else {
1381                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
1382                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1383                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1384                                                            lmt_addr + 6,
1385                                                            &cmd0[j], &cmd1[j],
1386                                                            segdw[j], flags);
1387                                 vst1q_u64(lmt_addr, cmd0[j]);
1388                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1389                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1390                                 off = segdw[j] - 4;
1391                                 off <<= 1;
1392                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1393                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1394                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1395                                                            lmt_addr + 6,
1396                                                            &cmd0[j], &cmd1[j],
1397                                                            segdw[j], flags);
1398                                 vst1q_u64(lmt_addr, cmd0[j]);
1399                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1400                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1401                         } else {
1402                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1403                                                            lmt_addr + 4,
1404                                                            &cmd0[j], &cmd1[j],
1405                                                            segdw[j], flags);
1406                                 vst1q_u64(lmt_addr, cmd0[j]);
1407                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1408                         }
1409                         *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1410                         *shift += 3;
1411                         j++;
1412                 }
1413                 lmt_used++;
1414                 lmt_addr += 16;
1415         }
1416
1417         return lmt_used;
1418 }
1419
1420 static __rte_always_inline void
1421 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1422                    uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1423 {
1424         /* Go to next line if we are out of space */
1425         if ((*loff + (dw << 4)) > 128) {
1426                 *data128 = *data128 |
1427                            (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1428                 *shift = *shift + 3;
1429                 *loff = 0;
1430                 *lnum = *lnum + 1;
1431         }
1432
1433         *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1434         *loff = *loff + (dw << 4);
1435 }
1436
1437 static __rte_always_inline void
1438 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1439                      uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1440                      uint64x2_t cmd3, const uint16_t flags)
1441 {
1442         uint8_t off;
1443
1444         /* Handle no fast free when security is enabled without mseg */
1445         if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1446             (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1447             !(flags & NIX_TX_MULTI_SEG_F)) {
1448                 union nix_send_sg_s sg;
1449
1450                 sg.u = vgetq_lane_u64(cmd1, 0);
1451                 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1452                 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1453
1454 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1455                 sg.u = vgetq_lane_u64(cmd1, 0);
1456                 if (!(sg.u & (1ULL << 55)))
1457                         RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1458                                                 0);
1459                 rte_io_wmb();
1460 #endif
1461         }
1462         if (flags & NIX_TX_MULTI_SEG_F) {
1463                 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1464                     (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1465                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1466                                                    &cmd0, &cmd1, segdw, flags);
1467                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1468                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1469                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1470                         off = segdw - 4;
1471                         off <<= 4;
1472                         vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1473                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1474                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1475                                                    &cmd0, &cmd1, segdw, flags);
1476                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1477                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1478                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1479                 } else {
1480                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1481                                                    &cmd0, &cmd1, segdw, flags);
1482                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1483                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1484                 }
1485         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1486                 /* Store the prepared send desc to LMT lines */
1487                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1488                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1489                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1490                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1491                         vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1492                 } else {
1493                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1494                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1495                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1496                 }
1497         } else {
1498                 /* Store the prepared send desc to LMT lines */
1499                 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1500                 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1501         }
1502 }
1503
1504 static __rte_always_inline uint16_t
1505 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1506                            struct rte_mbuf **tx_pkts, uint16_t pkts,
1507                            uint64_t *cmd, const uint16_t flags)
1508 {
1509         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1510         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1511         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1512                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1513         uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1514         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1515         uint64x2_t senddesc01_w0, senddesc23_w0;
1516         uint64x2_t senddesc01_w1, senddesc23_w1;
1517         uint64x2_t sendext01_w0, sendext23_w0;
1518         uint64x2_t sendext01_w1, sendext23_w1;
1519         uint64x2_t sendmem01_w0, sendmem23_w0;
1520         uint64x2_t sendmem01_w1, sendmem23_w1;
1521         uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1522         uint64x2_t sgdesc01_w0, sgdesc23_w0;
1523         uint64x2_t sgdesc01_w1, sgdesc23_w1;
1524         struct cn10k_eth_txq *txq = tx_queue;
1525         rte_iova_t io_addr = txq->io_addr;
1526         uintptr_t laddr = txq->lmt_base;
1527         uint8_t c_lnum, c_shft, c_loff;
1528         uint64x2_t ltypes01, ltypes23;
1529         uint64x2_t xtmp128, ytmp128;
1530         uint64x2_t xmask01, xmask23;
1531         uintptr_t c_laddr = laddr;
1532         uint8_t lnum, shift, loff;
1533         rte_iova_t c_io_addr;
1534         uint64_t sa_base;
1535         union wdata {
1536                 __uint128_t data128;
1537                 uint64_t data[2];
1538         } wd;
1539
1540         if (!(flags & NIX_TX_VWQE_F)) {
1541                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1542                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1543                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1544                 /* Reduce the cached count */
1545                 txq->fc_cache_pkts -= pkts;
1546         } else {
1547                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1548                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1549         }
1550
1551         /* Perform header writes before barrier for TSO */
1552         if (flags & NIX_TX_OFFLOAD_TSO_F) {
1553                 for (i = 0; i < pkts; i++)
1554                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1555         }
1556
1557         if (!(flags & NIX_TX_VWQE_F)) {
1558                 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1559         } else {
1560                 uint64_t w0 =
1561                         (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1562                         ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1563
1564                 senddesc01_w0 = vdupq_n_u64(w0);
1565         }
1566         senddesc23_w0 = senddesc01_w0;
1567
1568         senddesc01_w1 = vdupq_n_u64(0);
1569         senddesc23_w1 = senddesc01_w1;
1570         sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1571         sgdesc23_w0 = sgdesc01_w0;
1572
1573         if (flags & NIX_TX_NEED_EXT_HDR) {
1574                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1575                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1576                                                    BIT_ULL(15));
1577                         sendmem01_w0 =
1578                                 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1579                                             (NIX_SENDMEMALG_SETTSTMP << 56));
1580                         sendmem23_w0 = sendmem01_w0;
1581                         sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1582                         sendmem23_w1 = sendmem01_w1;
1583                 } else {
1584                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1585                 }
1586                 sendext23_w0 = sendext01_w0;
1587
1588                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1589                         sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1590                 else
1591                         sendext01_w1 = vdupq_n_u64(0);
1592                 sendext23_w1 = sendext01_w1;
1593         }
1594
1595         /* Get LMT base address and LMT ID as lcore id */
1596         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1597         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1598                 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1599                 c_io_addr = txq->cpt_io_addr;
1600                 sa_base = txq->sa_base;
1601         }
1602
1603         left = pkts;
1604 again:
1605         /* Number of packets to prepare depends on offloads enabled. */
1606         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1607                               cn10k_nix_pkts_per_vec_brst(flags) :
1608                               left;
1609         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1610                 wd.data128 = 0;
1611                 shift = 16;
1612         }
1613         lnum = 0;
1614         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1615                 loff = 0;
1616                 c_loff = 0;
1617                 c_lnum = 0;
1618                 c_shft = 16;
1619         }
1620
1621         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1622                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1623                         burst = i;
1624                         break;
1625                 }
1626
1627                 if (flags & NIX_TX_MULTI_SEG_F) {
1628                         uint8_t j;
1629
1630                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1631                                 struct rte_mbuf *m = tx_pkts[j];
1632
1633                                 /* Get dwords based on nb_segs. */
1634                                 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1635                                 /* Add dwords based on offloads. */
1636                                 segdw[j] += 1 + /* SEND HDR */
1637                                             !!(flags & NIX_TX_NEED_EXT_HDR) +
1638                                             !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1639                         }
1640
1641                         /* Check if there are enough LMTLINES for this loop */
1642                         if (lnum + 4 > 32) {
1643                                 uint8_t ldwords_con = 0, lneeded = 0;
1644                                 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1645                                         ldwords_con += segdw[j];
1646                                         if (ldwords_con > 8) {
1647                                                 lneeded += 1;
1648                                                 ldwords_con = segdw[j];
1649                                         }
1650                                 }
1651                                 lneeded += 1;
1652                                 if (lnum + lneeded > 32) {
1653                                         burst = i;
1654                                         break;
1655                                 }
1656                         }
1657                 }
1658                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1659                 senddesc01_w0 =
1660                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1661                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1662
1663                 senddesc23_w0 = senddesc01_w0;
1664                 sgdesc23_w0 = sgdesc01_w0;
1665
1666                 /* Clear vlan enables. */
1667                 if (flags & NIX_TX_NEED_EXT_HDR) {
1668                         sendext01_w1 = vbicq_u64(sendext01_w1,
1669                                                  vdupq_n_u64(0x3FFFF00FFFF00));
1670                         sendext23_w1 = sendext01_w1;
1671                 }
1672
1673                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1674                         /* Reset send mem alg to SETTSTMP from SUB*/
1675                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
1676                                                  vdupq_n_u64(BIT_ULL(59)));
1677                         /* Reset send mem address to default. */
1678                         sendmem01_w1 =
1679                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1680                         sendmem23_w0 = sendmem01_w0;
1681                         sendmem23_w1 = sendmem01_w1;
1682                 }
1683
1684                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1685                         /* Clear the LSO enable bit. */
1686                         sendext01_w0 = vbicq_u64(sendext01_w0,
1687                                                  vdupq_n_u64(BIT_ULL(14)));
1688                         sendext23_w0 = sendext01_w0;
1689                 }
1690
1691                 /* Move mbufs to iova */
1692                 mbuf0 = (uint64_t *)tx_pkts[0];
1693                 mbuf1 = (uint64_t *)tx_pkts[1];
1694                 mbuf2 = (uint64_t *)tx_pkts[2];
1695                 mbuf3 = (uint64_t *)tx_pkts[3];
1696
1697                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1698                                      offsetof(struct rte_mbuf, buf_iova));
1699                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1700                                      offsetof(struct rte_mbuf, buf_iova));
1701                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1702                                      offsetof(struct rte_mbuf, buf_iova));
1703                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1704                                      offsetof(struct rte_mbuf, buf_iova));
1705                 /*
1706                  * Get mbuf's, olflags, iova, pktlen, dataoff
1707                  * dataoff_iovaX.D[0] = iova,
1708                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1709                  * len_olflagsX.D[0] = ol_flags,
1710                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1711                  */
1712                 dataoff_iova0 = vld1q_u64(mbuf0);
1713                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1714                 dataoff_iova1 = vld1q_u64(mbuf1);
1715                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1716                 dataoff_iova2 = vld1q_u64(mbuf2);
1717                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1718                 dataoff_iova3 = vld1q_u64(mbuf3);
1719                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1720
1721                 /* Move mbufs to point pool */
1722                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1723                                      offsetof(struct rte_mbuf, pool) -
1724                                      offsetof(struct rte_mbuf, buf_iova));
1725                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1726                                      offsetof(struct rte_mbuf, pool) -
1727                                      offsetof(struct rte_mbuf, buf_iova));
1728                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1729                                      offsetof(struct rte_mbuf, pool) -
1730                                      offsetof(struct rte_mbuf, buf_iova));
1731                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1732                                      offsetof(struct rte_mbuf, pool) -
1733                                      offsetof(struct rte_mbuf, buf_iova));
1734
1735                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1736                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1737                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1738                         /*
1739                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1740                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1741                          */
1742
1743                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1744                                      : [a] "+w"(senddesc01_w1)
1745                                      : [in] "r"(mbuf0 + 2)
1746                                      : "memory");
1747
1748                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1749                                      : [a] "+w"(senddesc01_w1)
1750                                      : [in] "r"(mbuf1 + 2)
1751                                      : "memory");
1752
1753                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1754                                      : [b] "+w"(senddesc23_w1)
1755                                      : [in] "r"(mbuf2 + 2)
1756                                      : "memory");
1757
1758                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1759                                      : [b] "+w"(senddesc23_w1)
1760                                      : [in] "r"(mbuf3 + 2)
1761                                      : "memory");
1762
1763                         /* Get pool pointer alone */
1764                         mbuf0 = (uint64_t *)*mbuf0;
1765                         mbuf1 = (uint64_t *)*mbuf1;
1766                         mbuf2 = (uint64_t *)*mbuf2;
1767                         mbuf3 = (uint64_t *)*mbuf3;
1768                 } else {
1769                         /* Get pool pointer alone */
1770                         mbuf0 = (uint64_t *)*mbuf0;
1771                         mbuf1 = (uint64_t *)*mbuf1;
1772                         mbuf2 = (uint64_t *)*mbuf2;
1773                         mbuf3 = (uint64_t *)*mbuf3;
1774                 }
1775
1776                 const uint8x16_t shuf_mask2 = {
1777                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1778                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1779                 };
1780                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1781                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1782
1783                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1784                 const uint64x2_t and_mask0 = {
1785                         0xFFFFFFFFFFFFFFFF,
1786                         0x000000000000FFFF,
1787                 };
1788
1789                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1790                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1791                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1792                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1793
1794                 /*
1795                  * Pick only 16 bits of pktlen preset at bits 63:32
1796                  * and place them at bits 15:0.
1797                  */
1798                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1799                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1800
1801                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1802                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1803                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1804
1805                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1806                  * pktlen at 15:0 position.
1807                  */
1808                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1809                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1810                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1811                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1812
1813                 /* Move mbuf to point to pool_id. */
1814                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1815                                      offsetof(struct rte_mempool, pool_id));
1816                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1817                                      offsetof(struct rte_mempool, pool_id));
1818                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1819                                      offsetof(struct rte_mempool, pool_id));
1820                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1821                                      offsetof(struct rte_mempool, pool_id));
1822
1823                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1824                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1825                         /*
1826                          * Lookup table to translate ol_flags to
1827                          * il3/il4 types. But we still use ol3/ol4 types in
1828                          * senddesc_w1 as only one header processing is enabled.
1829                          */
1830                         const uint8x16_t tbl = {
1831                                 /* [0-15] = il4type:il3type */
1832                                 0x04, /* none (IPv6 assumed) */
1833                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1834                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1835                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1836                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1837                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1838                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1839                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1840                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1841                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1842                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1843                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1844                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1845                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1846                                        * RTE_MBUF_F_TX_TCP_CKSUM
1847                                        */
1848                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1849                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1850                                        */
1851                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1852                                        * RTE_MBUF_F_TX_UDP_CKSUM
1853                                        */
1854                         };
1855
1856                         /* Extract olflags to translate to iltypes */
1857                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1858                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1859
1860                         /*
1861                          * E(47):L3_LEN(9):L2_LEN(7+z)
1862                          * E(47):L3_LEN(9):L2_LEN(7+z)
1863                          */
1864                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1865                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1866
1867                         /* Move OLFLAGS bits 55:52 to 51:48
1868                          * with zeros preprended on the byte and rest
1869                          * don't care
1870                          */
1871                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1872                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1873                         /*
1874                          * E(48):L3_LEN(8):L2_LEN(z+7)
1875                          * E(48):L3_LEN(8):L2_LEN(z+7)
1876                          */
1877                         const int8x16_t tshft3 = {
1878                                 -1, 0, 8, 8, 8, 8, 8, 8,
1879                                 -1, 0, 8, 8, 8, 8, 8, 8,
1880                         };
1881
1882                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1883                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1884
1885                         /* Do the lookup */
1886                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1887                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1888
1889                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1890                          * and place it in ol3/ol4type of senddesc_w1
1891                          */
1892                         const uint8x16_t shuf_mask0 = {
1893                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1894                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1895                         };
1896
1897                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1898                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1899
1900                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1901                          * a [E(32):E(16):OL3(8):OL2(8)]
1902                          * a = a + (a << 8)
1903                          * a [E(32):E(16):(OL3+OL2):OL2]
1904                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1905                          */
1906                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1907                                                  vshlq_n_u16(senddesc01_w1, 8));
1908                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1909                                                  vshlq_n_u16(senddesc23_w1, 8));
1910
1911                         /* Move ltypes to senddesc*_w1 */
1912                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1913                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1914                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1915                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1916                         /*
1917                          * Lookup table to translate ol_flags to
1918                          * ol3/ol4 types.
1919                          */
1920
1921                         const uint8x16_t tbl = {
1922                                 /* [0-15] = ol4type:ol3type */
1923                                 0x00, /* none */
1924                                 0x03, /* OUTER_IP_CKSUM */
1925                                 0x02, /* OUTER_IPV4 */
1926                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1927                                 0x04, /* OUTER_IPV6 */
1928                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1929                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1930                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1931                                        * OUTER_IP_CKSUM
1932                                        */
1933                                 0x00, /* OUTER_UDP_CKSUM */
1934                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1935                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1936                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1937                                        * OUTER_IP_CKSUM
1938                                        */
1939                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1940                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1941                                        * OUTER_IP_CKSUM
1942                                        */
1943                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1944                                        * OUTER_IPV4
1945                                        */
1946                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1947                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1948                                        */
1949                         };
1950
1951                         /* Extract olflags to translate to iltypes */
1952                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1953                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1954
1955                         /*
1956                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1957                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1958                          */
1959                         const uint8x16_t shuf_mask5 = {
1960                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1961                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1962                         };
1963                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1964                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1965
1966                         /* Extract outer ol flags only */
1967                         const uint64x2_t o_cksum_mask = {
1968                                 0x1C00020000000000,
1969                                 0x1C00020000000000,
1970                         };
1971
1972                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1973                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1974
1975                         /* Extract OUTER_UDP_CKSUM bit 41 and
1976                          * move it to bit 61
1977                          */
1978
1979                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1980                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1981
1982                         /* Shift oltype by 2 to start nibble from BIT(56)
1983                          * instead of BIT(58)
1984                          */
1985                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1986                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1987                         /*
1988                          * E(48):L3_LEN(8):L2_LEN(z+7)
1989                          * E(48):L3_LEN(8):L2_LEN(z+7)
1990                          */
1991                         const int8x16_t tshft3 = {
1992                                 -1, 0, 8, 8, 8, 8, 8, 8,
1993                                 -1, 0, 8, 8, 8, 8, 8, 8,
1994                         };
1995
1996                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1997                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1998
1999                         /* Do the lookup */
2000                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
2001                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
2002
2003                         /* Pick only relevant fields i.e Bit 56:63 of oltype
2004                          * and place it in ol3/ol4type of senddesc_w1
2005                          */
2006                         const uint8x16_t shuf_mask0 = {
2007                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
2008                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
2009                         };
2010
2011                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2012                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2013
2014                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
2015                          * a [E(32):E(16):OL3(8):OL2(8)]
2016                          * a = a + (a << 8)
2017                          * a [E(32):E(16):(OL3+OL2):OL2]
2018                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
2019                          */
2020                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2021                                                  vshlq_n_u16(senddesc01_w1, 8));
2022                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2023                                                  vshlq_n_u16(senddesc23_w1, 8));
2024
2025                         /* Move ltypes to senddesc*_w1 */
2026                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2027                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2028                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
2029                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
2030                         /* Lookup table to translate ol_flags to
2031                          * ol4type, ol3type, il4type, il3type of senddesc_w1
2032                          */
2033                         const uint8x16x2_t tbl = {{
2034                                 {
2035                                         /* [0-15] = il4type:il3type */
2036                                         0x04, /* none (IPv6) */
2037                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
2038                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
2039                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
2040                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
2041                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
2042                                                * RTE_MBUF_F_TX_TCP_CKSUM
2043                                                */
2044                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2045                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2046                                                */
2047                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2048                                                * RTE_MBUF_F_TX_UDP_CKSUM
2049                                                */
2050                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
2051                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
2052                                                * RTE_MBUF_F_TX_TCP_CKSUM
2053                                                */
2054                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
2055                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2056                                                */
2057                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
2058                                                * RTE_MBUF_F_TX_UDP_CKSUM
2059                                                */
2060                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
2061                                                * RTE_MBUF_F_TX_IP_CKSUM
2062                                                */
2063                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2064                                                * RTE_MBUF_F_TX_TCP_CKSUM
2065                                                */
2066                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2067                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2068                                                */
2069                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2070                                                * RTE_MBUF_F_TX_UDP_CKSUM
2071                                                */
2072                                 },
2073
2074                                 {
2075                                         /* [16-31] = ol4type:ol3type */
2076                                         0x00, /* none */
2077                                         0x03, /* OUTER_IP_CKSUM */
2078                                         0x02, /* OUTER_IPV4 */
2079                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2080                                         0x04, /* OUTER_IPV6 */
2081                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2082                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2083                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2084                                                * OUTER_IP_CKSUM
2085                                                */
2086                                         0x00, /* OUTER_UDP_CKSUM */
2087                                         0x33, /* OUTER_UDP_CKSUM |
2088                                                * OUTER_IP_CKSUM
2089                                                */
2090                                         0x32, /* OUTER_UDP_CKSUM |
2091                                                * OUTER_IPV4
2092                                                */
2093                                         0x33, /* OUTER_UDP_CKSUM |
2094                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2095                                                */
2096                                         0x34, /* OUTER_UDP_CKSUM |
2097                                                * OUTER_IPV6
2098                                                */
2099                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2100                                                * OUTER_IP_CKSUM
2101                                                */
2102                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2103                                                * OUTER_IPV4
2104                                                */
2105                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2106                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2107                                                */
2108                                 },
2109                         }};
2110
2111                         /* Extract olflags to translate to oltype & iltype */
2112                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2113                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2114
2115                         /*
2116                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2117                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2118                          */
2119                         const uint32x4_t tshft_4 = {
2120                                 1,
2121                                 0,
2122                                 1,
2123                                 0,
2124                         };
2125                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2126                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2127
2128                         /*
2129                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2130                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2131                          */
2132                         const uint8x16_t shuf_mask5 = {
2133                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2134                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2135                         };
2136                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2137                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2138
2139                         /* Extract outer and inner header ol_flags */
2140                         const uint64x2_t oi_cksum_mask = {
2141                                 0x1CF0020000000000,
2142                                 0x1CF0020000000000,
2143                         };
2144
2145                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2146                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2147
2148                         /* Extract OUTER_UDP_CKSUM bit 41 and
2149                          * move it to bit 61
2150                          */
2151
2152                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2153                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2154
2155                         /* Shift right oltype by 2 and iltype by 4
2156                          * to start oltype nibble from BIT(58)
2157                          * instead of BIT(56) and iltype nibble from BIT(48)
2158                          * instead of BIT(52).
2159                          */
2160                         const int8x16_t tshft5 = {
2161                                 8, 8, 8, 8, 8, 8, -4, -2,
2162                                 8, 8, 8, 8, 8, 8, -4, -2,
2163                         };
2164
2165                         xtmp128 = vshlq_u8(xtmp128, tshft5);
2166                         ytmp128 = vshlq_u8(ytmp128, tshft5);
2167                         /*
2168                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2169                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2170                          */
2171                         const int8x16_t tshft3 = {
2172                                 -1, 0, -1, 0, 0, 0, 0, 0,
2173                                 -1, 0, -1, 0, 0, 0, 0, 0,
2174                         };
2175
2176                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2177                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2178
2179                         /* Mark Bit(4) of oltype */
2180                         const uint64x2_t oi_cksum_mask2 = {
2181                                 0x1000000000000000,
2182                                 0x1000000000000000,
2183                         };
2184
2185                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2186                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2187
2188                         /* Do the lookup */
2189                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2190                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2191
2192                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
2193                          * Bit 56:63 of oltype and place it in corresponding
2194                          * place in senddesc_w1.
2195                          */
2196                         const uint8x16_t shuf_mask0 = {
2197                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2198                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2199                         };
2200
2201                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2202                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2203
2204                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2205                          * l3len, l2len, ol3len, ol2len.
2206                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2207                          * a = a + (a << 8)
2208                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2209                          * a = a + (a << 16)
2210                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2211                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2212                          */
2213                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2214                                                  vshlq_n_u32(senddesc01_w1, 8));
2215                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2216                                                  vshlq_n_u32(senddesc23_w1, 8));
2217
2218                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2219                         senddesc01_w1 = vaddq_u8(
2220                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2221                         senddesc23_w1 = vaddq_u8(
2222                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2223
2224                         /* Move ltypes to senddesc*_w1 */
2225                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2226                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2227                 }
2228
2229                 xmask01 = vdupq_n_u64(0);
2230                 xmask23 = xmask01;
2231                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2232                              : [a] "+w"(xmask01)
2233                              : [in] "r"(mbuf0)
2234                              : "memory");
2235
2236                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2237                              : [a] "+w"(xmask01)
2238                              : [in] "r"(mbuf1)
2239                              : "memory");
2240
2241                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2242                              : [b] "+w"(xmask23)
2243                              : [in] "r"(mbuf2)
2244                              : "memory");
2245
2246                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2247                              : [b] "+w"(xmask23)
2248                              : [in] "r"(mbuf3)
2249                              : "memory");
2250                 xmask01 = vshlq_n_u64(xmask01, 20);
2251                 xmask23 = vshlq_n_u64(xmask23, 20);
2252
2253                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2254                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2255
2256                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2257                         /* Tx ol_flag for vlan. */
2258                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2259                         /* Bit enable for VLAN1 */
2260                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2261                         /* Tx ol_flag for QnQ. */
2262                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2263                         /* Bit enable for VLAN0 */
2264                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2265                         /* Load vlan values from packet. outer is VLAN 0 */
2266                         uint64x2_t ext01 = {
2267                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2268                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2269                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2270                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2271                         };
2272                         uint64x2_t ext23 = {
2273                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2274                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2275                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2276                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2277                         };
2278
2279                         /* Get ol_flags of the packets. */
2280                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2281                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2282
2283                         /* ORR vlan outer/inner values into cmd. */
2284                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2285                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2286
2287                         /* Test for offload enable bits and generate masks. */
2288                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2289                                                       mlv),
2290                                             vandq_u64(vtstq_u64(xtmp128, olq),
2291                                                       mlq));
2292                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2293                                                       mlv),
2294                                             vandq_u64(vtstq_u64(ytmp128, olq),
2295                                                       mlq));
2296
2297                         /* Set vlan enable bits into cmd based on mask. */
2298                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2299                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2300                 }
2301
2302                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2303                         /* Tx ol_flag for timestamp. */
2304                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2305                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
2306                         /* Set send mem alg to SUB. */
2307                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2308                         /* Increment send mem address by 8. */
2309                         const uint64x2_t addr = {0x8, 0x8};
2310
2311                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2312                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2313
2314                         /* Check if timestamp is requested and generate inverted
2315                          * mask as we need not make any changes to default cmd
2316                          * value.
2317                          */
2318                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2319                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2320
2321                         /* Change send mem address to an 8 byte offset when
2322                          * TSTMP is disabled.
2323                          */
2324                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
2325                                                  vandq_u64(xtmp128, addr));
2326                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
2327                                                  vandq_u64(ytmp128, addr));
2328                         /* Change send mem alg to SUB when TSTMP is disabled. */
2329                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
2330                                                  vandq_u64(xtmp128, alg));
2331                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
2332                                                  vandq_u64(ytmp128, alg));
2333
2334                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2335                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2336                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2337                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2338                 }
2339
2340                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2341                         const uint64_t lso_fmt = txq->lso_tun_fmt;
2342                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2343                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2344
2345                         /* Extract SD W1 as we need to set L4 types. */
2346                         vst1q_u64(sd_w1, senddesc01_w1);
2347                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
2348
2349                         /* Extract SX W0 as we need to set LSO fields. */
2350                         vst1q_u64(sx_w0, sendext01_w0);
2351                         vst1q_u64(sx_w0 + 2, sendext23_w0);
2352
2353                         /* Extract ol_flags. */
2354                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2355                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2356
2357                         /* Prepare individual mbufs. */
2358                         cn10k_nix_prepare_tso(tx_pkts[0],
2359                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
2360                                 (union nix_send_ext_w0_u *)&sx_w0[0],
2361                                 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2362
2363                         cn10k_nix_prepare_tso(tx_pkts[1],
2364                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
2365                                 (union nix_send_ext_w0_u *)&sx_w0[1],
2366                                 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2367
2368                         cn10k_nix_prepare_tso(tx_pkts[2],
2369                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
2370                                 (union nix_send_ext_w0_u *)&sx_w0[2],
2371                                 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2372
2373                         cn10k_nix_prepare_tso(tx_pkts[3],
2374                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
2375                                 (union nix_send_ext_w0_u *)&sx_w0[3],
2376                                 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2377
2378                         senddesc01_w1 = vld1q_u64(sd_w1);
2379                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2380
2381                         sendext01_w0 = vld1q_u64(sx_w0);
2382                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
2383                 }
2384
2385                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2386                     !(flags & NIX_TX_MULTI_SEG_F) &&
2387                     !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2388                         /* Set don't free bit if reference count > 1 */
2389                         xmask01 = vdupq_n_u64(0);
2390                         xmask23 = xmask01;
2391
2392                         /* Move mbufs to iova */
2393                         mbuf0 = (uint64_t *)tx_pkts[0];
2394                         mbuf1 = (uint64_t *)tx_pkts[1];
2395                         mbuf2 = (uint64_t *)tx_pkts[2];
2396                         mbuf3 = (uint64_t *)tx_pkts[3];
2397
2398                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2399                                 vsetq_lane_u64(0x80000, xmask01, 0);
2400                         else
2401                                 RTE_MEMPOOL_CHECK_COOKIES(
2402                                         ((struct rte_mbuf *)mbuf0)->pool,
2403                                         (void **)&mbuf0, 1, 0);
2404
2405                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2406                                 vsetq_lane_u64(0x80000, xmask01, 1);
2407                         else
2408                                 RTE_MEMPOOL_CHECK_COOKIES(
2409                                         ((struct rte_mbuf *)mbuf1)->pool,
2410                                         (void **)&mbuf1, 1, 0);
2411
2412                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2413                                 vsetq_lane_u64(0x80000, xmask23, 0);
2414                         else
2415                                 RTE_MEMPOOL_CHECK_COOKIES(
2416                                         ((struct rte_mbuf *)mbuf2)->pool,
2417                                         (void **)&mbuf2, 1, 0);
2418
2419                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2420                                 vsetq_lane_u64(0x80000, xmask23, 1);
2421                         else
2422                                 RTE_MEMPOOL_CHECK_COOKIES(
2423                                         ((struct rte_mbuf *)mbuf3)->pool,
2424                                         (void **)&mbuf3, 1, 0);
2425                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2426                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2427                 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2428                            !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2429                         /* Move mbufs to iova */
2430                         mbuf0 = (uint64_t *)tx_pkts[0];
2431                         mbuf1 = (uint64_t *)tx_pkts[1];
2432                         mbuf2 = (uint64_t *)tx_pkts[2];
2433                         mbuf3 = (uint64_t *)tx_pkts[3];
2434
2435                         /* Mark mempool object as "put" since
2436                          * it is freed by NIX
2437                          */
2438                         RTE_MEMPOOL_CHECK_COOKIES(
2439                                 ((struct rte_mbuf *)mbuf0)->pool,
2440                                 (void **)&mbuf0, 1, 0);
2441
2442                         RTE_MEMPOOL_CHECK_COOKIES(
2443                                 ((struct rte_mbuf *)mbuf1)->pool,
2444                                 (void **)&mbuf1, 1, 0);
2445
2446                         RTE_MEMPOOL_CHECK_COOKIES(
2447                                 ((struct rte_mbuf *)mbuf2)->pool,
2448                                 (void **)&mbuf2, 1, 0);
2449
2450                         RTE_MEMPOOL_CHECK_COOKIES(
2451                                 ((struct rte_mbuf *)mbuf3)->pool,
2452                                 (void **)&mbuf3, 1, 0);
2453                 }
2454
2455                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2456                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2457                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2458                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2459                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2460
2461                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2462                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2463                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2464                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2465
2466                 if (flags & NIX_TX_NEED_EXT_HDR) {
2467                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2468                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2469                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2470                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2471                 }
2472
2473                 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2474                         const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2475                                                 RTE_MBUF_F_TX_SEC_OFFLOAD};
2476                         uintptr_t next;
2477                         uint8_t dw;
2478
2479                         /* Extract ol_flags. */
2480                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2481                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2482
2483                         xtmp128 = vtstq_u64(olf, xtmp128);
2484                         ytmp128 = vtstq_u64(olf, ytmp128);
2485
2486                         /* Process mbuf0 */
2487                         dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2488                         if (vgetq_lane_u64(xtmp128, 0))
2489                                 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2490                                                        &cmd1[0], &next, c_laddr,
2491                                                        &c_lnum, &c_loff,
2492                                                        &c_shft, sa_base, flags);
2493                         else
2494                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2495                                                    &shift, &wd.data128, &next);
2496
2497                         /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2498                         cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2499                                              cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2500                                              flags);
2501
2502                         /* Process mbuf1 */
2503                         dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2504                         if (vgetq_lane_u64(xtmp128, 1))
2505                                 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2506                                                        &cmd1[1], &next, c_laddr,
2507                                                        &c_lnum, &c_loff,
2508                                                        &c_shft, sa_base, flags);
2509                         else
2510                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2511                                                    &shift, &wd.data128, &next);
2512
2513                         /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2514                         cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2515                                              cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2516                                              flags);
2517
2518                         /* Process mbuf2 */
2519                         dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2520                         if (vgetq_lane_u64(ytmp128, 0))
2521                                 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2522                                                        &cmd1[2], &next, c_laddr,
2523                                                        &c_lnum, &c_loff,
2524                                                        &c_shft, sa_base, flags);
2525                         else
2526                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2527                                                    &shift, &wd.data128, &next);
2528
2529                         /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2530                         cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2531                                              cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2532                                              flags);
2533
2534                         /* Process mbuf3 */
2535                         dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2536                         if (vgetq_lane_u64(ytmp128, 1))
2537                                 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2538                                                        &cmd1[3], &next, c_laddr,
2539                                                        &c_lnum, &c_loff,
2540                                                        &c_shft, sa_base, flags);
2541                         else
2542                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2543                                                    &shift, &wd.data128, &next);
2544
2545                         /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2546                         cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2547                                              cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2548                                              flags);
2549
2550                 } else if (flags & NIX_TX_MULTI_SEG_F) {
2551                         uint8_t j;
2552
2553                         segdw[4] = 8;
2554                         j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2555                                                           cmd2, cmd3, segdw,
2556                                                           (uint64_t *)
2557                                                           LMT_OFF(laddr, lnum,
2558                                                                   0),
2559                                                           &wd.data128, &shift,
2560                                                           flags);
2561                         lnum += j;
2562                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2563                         /* Store the prepared send desc to LMT lines */
2564                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2565                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2566                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2567                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2568                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2569                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2570                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2571                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2572                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2573                                 lnum += 1;
2574                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2575                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2576                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2577                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2578                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2579                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2580                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2581                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2582                         } else {
2583                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2584                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2585                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2586                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2587                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2588                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2589                                 lnum += 1;
2590                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2591                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2592                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2593                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2594                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2595                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2596                         }
2597                         lnum += 1;
2598                 } else {
2599                         /* Store the prepared send desc to LMT lines */
2600                         vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2601                         vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2602                         vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2603                         vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2604                         vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2605                         vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2606                         vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2607                         vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2608                         lnum += 1;
2609                 }
2610
2611                 if (flags & NIX_TX_MULTI_SEG_F) {
2612                         tx_pkts[0]->next = NULL;
2613                         tx_pkts[1]->next = NULL;
2614                         tx_pkts[2]->next = NULL;
2615                         tx_pkts[3]->next = NULL;
2616                 }
2617
2618                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2619         }
2620
2621         /* Roundup lnum to last line if it is partial */
2622         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2623                 lnum = lnum + !!loff;
2624                 wd.data128 = wd.data128 |
2625                         (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2626         }
2627
2628         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2629                 wd.data[0] >>= 16;
2630
2631         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
2632                 ws[1] = roc_sso_hws_head_wait(ws[0]);
2633
2634         left -= burst;
2635
2636         /* Submit CPT instructions if any */
2637         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2638                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2639                                      c_shft);
2640
2641         /* Trigger LMTST */
2642         if (lnum > 16) {
2643                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2644                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2645
2646                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2647                 wd.data[0] &= ~0x7ULL;
2648
2649                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2650                         wd.data[0] <<= 16;
2651
2652                 wd.data[0] |= (15ULL << 12);
2653                 wd.data[0] |= (uint64_t)lmt_id;
2654
2655                 /* STEOR0 */
2656                 roc_lmt_submit_steorl(wd.data[0], pa);
2657
2658                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2659                         wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2660
2661                 pa = io_addr | (wd.data[1] & 0x7) << 4;
2662                 wd.data[1] &= ~0x7ULL;
2663
2664                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2665                         wd.data[1] <<= 16;
2666
2667                 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2668                 wd.data[1] |= (uint64_t)(lmt_id + 16);
2669
2670                 /* STEOR1 */
2671                 roc_lmt_submit_steorl(wd.data[1], pa);
2672         } else if (lnum) {
2673                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2674                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2675
2676                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2677                 wd.data[0] &= ~0x7ULL;
2678
2679                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2680                         wd.data[0] <<= 16;
2681
2682                 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2683                 wd.data[0] |= lmt_id;
2684
2685                 /* STEOR0 */
2686                 roc_lmt_submit_steorl(wd.data[0], pa);
2687         }
2688
2689         rte_io_wmb();
2690         if (left)
2691                 goto again;
2692
2693         if (unlikely(scalar)) {
2694                 if (flags & NIX_TX_MULTI_SEG_F)
2695                         pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2696                                                          scalar, cmd, flags);
2697                 else
2698                         pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2699                                                     scalar, cmd, flags);
2700         }
2701
2702         return pkts;
2703 }
2704
2705 #else
2706 static __rte_always_inline uint16_t
2707 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2708                            struct rte_mbuf **tx_pkts, uint16_t pkts,
2709                            uint64_t *cmd, const uint16_t flags)
2710 {
2711         RTE_SET_USED(ws);
2712         RTE_SET_USED(tx_queue);
2713         RTE_SET_USED(tx_pkts);
2714         RTE_SET_USED(pkts);
2715         RTE_SET_USED(cmd);
2716         RTE_SET_USED(flags);
2717         return 0;
2718 }
2719 #endif
2720
2721 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2722 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2723 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2724 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2725 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
2726 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2727 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2728
2729 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2730 #define NIX_TX_FASTPATH_MODES_0_15                                             \
2731         T(no_offload, 4, NIX_TX_OFFLOAD_NONE)                                  \
2732         T(l3l4csum, 4, L3L4CSUM_F)                                             \
2733         T(ol3ol4csum, 4, OL3OL4CSUM_F)                                         \
2734         T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F)                   \
2735         T(vlan, 6, VLAN_F)                                                     \
2736         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
2737         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
2738         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2739         T(noff, 4, NOFF_F)                                                     \
2740         T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F)                               \
2741         T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F)                           \
2742         T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2743         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
2744         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2745         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
2746         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
2747           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2748
2749 #define NIX_TX_FASTPATH_MODES_16_31                                            \
2750         T(tso, 6, TSO_F)                                                       \
2751         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
2752         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
2753         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
2754         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
2755         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
2756         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
2757         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2758           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2759         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
2760         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
2761         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
2762         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
2763           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2764         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
2765         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
2766         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2767         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2768           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2769
2770 #define NIX_TX_FASTPATH_MODES_32_47                                            \
2771         T(ts, 8, TSP_F)                                                        \
2772         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
2773         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
2774         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2775         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
2776         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
2777         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2778         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
2779           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2780         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
2781         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
2782         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2783         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
2784           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2785         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
2786         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
2787         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
2788         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
2789           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2790
2791 #define NIX_TX_FASTPATH_MODES_48_63                                            \
2792         T(ts_tso, 8, TSP_F | TSO_F)                                            \
2793         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
2794         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
2795         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
2796           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
2797         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
2798         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
2799         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
2800         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2801           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2802         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
2803         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
2804         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
2805         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
2806           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2807         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
2808         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
2809           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
2810         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
2811           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
2812         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2813           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2814
2815 #define NIX_TX_FASTPATH_MODES_64_79                                            \
2816         T(sec, 4, T_SEC_F)                                                     \
2817         T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F)                               \
2818         T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F)                           \
2819         T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2820         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
2821         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
2822         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
2823         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2824           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2825         T(sec_noff, 4, T_SEC_F | NOFF_F)                                       \
2826         T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2827         T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2828         T(sec_noff_ol3ol4csum_l3l4csum, 4,                                     \
2829           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2830         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2831         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2832         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2833           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2834         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2835           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2836
2837 #define NIX_TX_FASTPATH_MODES_80_95                                            \
2838         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2839         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2840         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2841         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2842           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2843         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2844         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2845         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2846         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2847           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2848         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2849         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2850         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2851         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2852           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2853         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2854         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2855           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2856         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2857           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2858         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2859           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2860
2861 #define NIX_TX_FASTPATH_MODES_96_111                                           \
2862         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2863         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2864         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2865         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2866           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2867         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2868         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2869         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2870         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2871           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2872         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2873         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2874         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2875         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2876           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2877         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2878         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2879           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2880         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2881           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2882         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2883           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2884
2885 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2886         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2887         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2888         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2889         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2890           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2891         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2892         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2893           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2894         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2895           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2896         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2897           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2898         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2899         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2900           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2901         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2902           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2903         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2904           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2905         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2906         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2907           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2908         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2909           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2910         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2911           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2912                   L3L4CSUM_F)
2913
2914 #define NIX_TX_FASTPATH_MODES                                                  \
2915         NIX_TX_FASTPATH_MODES_0_15                                             \
2916         NIX_TX_FASTPATH_MODES_16_31                                            \
2917         NIX_TX_FASTPATH_MODES_32_47                                            \
2918         NIX_TX_FASTPATH_MODES_48_63                                            \
2919         NIX_TX_FASTPATH_MODES_64_79                                            \
2920         NIX_TX_FASTPATH_MODES_80_95                                            \
2921         NIX_TX_FASTPATH_MODES_96_111                                           \
2922         NIX_TX_FASTPATH_MODES_112_127
2923
2924 #define T(name, sz, flags)                                                     \
2925         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
2926                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2927         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
2928                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2929         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
2930                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2931         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2932                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2933
2934 NIX_TX_FASTPATH_MODES
2935 #undef T
2936
2937 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2938         uint16_t __rte_noinline __rte_hot fn(                                  \
2939                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2940         {                                                                      \
2941                 uint64_t cmd[sz];                                              \
2942                 /* For TSO inner checksum is a must */                         \
2943                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2944                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2945                         return 0;                                              \
2946                 return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
2947                                            flags);                             \
2948         }
2949
2950 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2951         uint16_t __rte_noinline __rte_hot fn(                                  \
2952                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2953         {                                                                      \
2954                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2955                 /* For TSO inner checksum is a must */                         \
2956                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2957                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2958                         return 0;                                              \
2959                 return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
2960                                                 cmd,                           \
2961                                                 flags | NIX_TX_MULTI_SEG_F);   \
2962         }
2963
2964 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
2965         uint16_t __rte_noinline __rte_hot fn(                                  \
2966                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2967         {                                                                      \
2968                 uint64_t cmd[sz];                                              \
2969                 /* For TSO inner checksum is a must */                         \
2970                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2971                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2972                         return 0;                                              \
2973                 return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts,     \
2974                                                   pkts, cmd, (flags));         \
2975         }
2976
2977 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
2978         uint16_t __rte_noinline __rte_hot fn(                                  \
2979                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2980         {                                                                      \
2981                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2982                 /* For TSO inner checksum is a must */                         \
2983                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2984                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2985                         return 0;                                              \
2986                 return cn10k_nix_xmit_pkts_vector(                             \
2987                         tx_queue, NULL, tx_pkts, pkts, cmd,                    \
2988                         (flags) | NIX_TX_MULTI_SEG_F);                         \
2989         }
2990
2991 #endif /* __CN10K_TX_H__ */