feaa41fec709465de0d1054faaa0ee5082270a4f
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #include <rte_eventdev.h>
10
11 #define NIX_TX_OFFLOAD_NONE           (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
19 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
20
21 /* Flags to control xmit_prepare function.
22  * Defining it from backwards to denote its been
23  * not used as offload flags to pick function
24  */
25 #define NIX_TX_VWQE_F      BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
27
28 #define NIX_TX_NEED_SEND_HDR_W1                                                \
29         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
30          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31
32 #define NIX_TX_NEED_EXT_HDR                                                    \
33         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
34          NIX_TX_OFFLOAD_TSO_F)
35
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
37         do {                                                                   \
38                 /* Cached value is low, Update the fc_cache_pkts */            \
39                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
40                         /* Multiply with sqe_per_sqb to express in pkts */     \
41                         (txq)->fc_cache_pkts =                                 \
42                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
43                                 << (txq)->sqes_per_sqb_log2;                   \
44                         /* Check it again for the room */                      \
45                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
46                                 return 0;                                      \
47                 }                                                              \
48         } while (0)
49
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51  * is encoded as 4bits.
52  */
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
54
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56
57 /* Function to determine no of tx subdesc required in case ext
58  * sub desc is enabled.
59  */
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 {
63         return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
64                              2 :
65                              ((flags &
66                          (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
67                                       1 :
68                                       0);
69 }
70
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73 {
74         if (!(flags & NIX_TX_MULTI_SEG_F))
75                 return cn10k_nix_tx_ext_subs(flags) + 2;
76
77         /* Already everything is accounted for in segdw */
78         return segdw;
79 }
80
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83 {
84         return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85                << ROC_LMT_LINES_PER_CORE_LOG2;
86 }
87
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90 {
91         return (flags & NIX_TX_NEED_EXT_HDR) ?
92                              ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
93                              8;
94 }
95
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
98 {
99         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
100         uint64_t data;
101
102         /* This will be moved to addr area */
103         data = dw_m1;
104         /* 15 vector sizes for single seg */
105         data |= dw_m1 << 19;
106         data |= dw_m1 << 22;
107         data |= dw_m1 << 25;
108         data |= dw_m1 << 28;
109         data |= dw_m1 << 31;
110         data |= dw_m1 << 34;
111         data |= dw_m1 << 37;
112         data |= dw_m1 << 40;
113         data |= dw_m1 << 43;
114         data |= dw_m1 << 46;
115         data |= dw_m1 << 49;
116         data |= dw_m1 << 52;
117         data |= dw_m1 << 55;
118         data |= dw_m1 << 58;
119         data |= dw_m1 << 61;
120
121         return data;
122 }
123
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126 {
127         return ((flags & NIX_TX_NEED_EXT_HDR) ?
128                               (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
129                               4);
130 }
131
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134 {
135         const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
136         uint64_t data;
137
138         /* This will be moved to addr area */
139         data = dw_m1;
140         /* 15 vector sizes for single seg */
141         data |= dw_m1 << 19;
142         data |= dw_m1 << 22;
143         data |= dw_m1 << 25;
144         data |= dw_m1 << 28;
145         data |= dw_m1 << 31;
146         data |= dw_m1 << 34;
147         data |= dw_m1 << 37;
148         data |= dw_m1 << 40;
149         data |= dw_m1 << 43;
150         data |= dw_m1 << 46;
151         data |= dw_m1 << 49;
152         data |= dw_m1 << 52;
153         data |= dw_m1 << 55;
154         data |= dw_m1 << 58;
155         data |= dw_m1 << 61;
156
157         return data;
158 }
159
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
162 {
163         /* We have two CPT instructions per LMTLine */
164         const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
165         uint64_t data;
166
167         /* This will be moved to addr area */
168         data = dw_m1 << 16;
169         data |= dw_m1 << 19;
170         data |= dw_m1 << 22;
171         data |= dw_m1 << 25;
172         data |= dw_m1 << 28;
173         data |= dw_m1 << 31;
174         data |= dw_m1 << 34;
175         data |= dw_m1 << 37;
176         data |= dw_m1 << 40;
177         data |= dw_m1 << 43;
178         data |= dw_m1 << 46;
179         data |= dw_m1 << 49;
180         data |= dw_m1 << 52;
181         data |= dw_m1 << 55;
182         data |= dw_m1 << 58;
183         data |= dw_m1 << 61;
184
185         return data;
186 }
187
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
190                       const uint16_t flags, const uint16_t static_sz)
191 {
192         if (static_sz)
193                 cmd[0] = txq->send_hdr_w0;
194         else
195                 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
196                          ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
197         cmd[1] = 0;
198
199         if (flags & NIX_TX_NEED_EXT_HDR) {
200                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
201                         cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
202                 else
203                         cmd[2] = NIX_SUBDC_EXT << 60;
204                 cmd[3] = 0;
205                 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
206         } else {
207                 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
208         }
209 }
210
211 static __rte_always_inline void
212 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
213                      uint8_t loff, uint8_t shft)
214 {
215         uint64_t data;
216         uintptr_t pa;
217
218         /* Check if there is any CPT instruction to submit */
219         if (!lnum && !loff)
220                 return;
221
222         data = cn10k_cpt_tx_steor_data();
223         /* Update lmtline use for partial end line */
224         if (loff) {
225                 data &= ~(0x7ULL << shft);
226                 /* Update it to half full i.e 64B */
227                 data |= (0x3UL << shft);
228         }
229
230         pa = io_addr | ((data >> 16) & 0x7) << 4;
231         data &= ~(0x7ULL << 16);
232         /* Update lines - 1 that contain valid data */
233         data |= ((uint64_t)(lnum + loff - 1)) << 12;
234         data |= lmt_id;
235
236         /* STEOR */
237         roc_lmt_submit_steorl(data, pa);
238 }
239
240 #if defined(RTE_ARCH_ARM64)
241 static __rte_always_inline void
242 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
243                        uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
244                        uint8_t *loff, uint8_t *shft, uint64_t sa_base,
245                        const uint16_t flags)
246 {
247         struct cn10k_sec_sess_priv sess_priv;
248         uint32_t pkt_len, dlen_adj, rlen;
249         uint8_t l3l4type, chksum;
250         uint64x2_t cmd01, cmd23;
251         uintptr_t dptr, nixtx;
252         uint64_t ucode_cmd[4];
253         uint64_t *laddr;
254         uint8_t l2_len;
255         uint16_t tag;
256         uint64_t sa;
257
258         sess_priv.u64 = *rte_security_dynfield(m);
259
260         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
261                 l2_len = vgetq_lane_u8(*cmd0, 8);
262                 /* Extract l3l4type either from il3il4type or ol3ol4type */
263                 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
264                     flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)
265                         l3l4type = vgetq_lane_u8(*cmd0, 13);
266                 else
267                         l3l4type = vgetq_lane_u8(*cmd0, 12);
268
269                 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
270                 chksum = ~chksum;
271                 sess_priv.chksum = sess_priv.chksum & chksum;
272                 /* Clear SEND header flags */
273                 *cmd0 = vsetq_lane_u16(0, *cmd0, 6);
274         } else {
275                 l2_len = m->l2_len;
276         }
277
278         /* Retrieve DPTR */
279         dptr = vgetq_lane_u64(*cmd1, 1);
280         pkt_len = vgetq_lane_u16(*cmd0, 0);
281
282         /* Calculate dlen adj */
283         dlen_adj = pkt_len - l2_len;
284         rlen = (dlen_adj + sess_priv.roundup_len) +
285                (sess_priv.roundup_byte - 1);
286         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
287         rlen += sess_priv.partial_len;
288         dlen_adj = rlen - dlen_adj;
289
290         /* Update send descriptors. Security is single segment only */
291         *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
292         *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
293
294         /* Get area where NIX descriptor needs to be stored */
295         nixtx = dptr + pkt_len + dlen_adj;
296         nixtx += BIT_ULL(7);
297         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
298
299         /* Return nixtx addr */
300         *nixtx_addr = (nixtx + 16);
301
302         /* DLEN passed is excluding L2HDR */
303         pkt_len -= l2_len;
304         tag = sa_base & 0xFFFFUL;
305         sa_base &= ~0xFFFFUL;
306         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
307         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
308         ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
309                         ((uint64_t)sess_priv.chksum) << 32 | pkt_len);
310
311         /* CPT Word 0 and Word 1 */
312         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
313         /* CPT_RES_S is 16B above NIXTX */
314         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
315
316         /* CPT word 2 and 3 */
317         cmd23 = vdupq_n_u64(0);
318         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
319                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
320         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
321
322         dptr += l2_len;
323
324         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
325                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
326                         *((uint16_t *)(dptr - 2)) =
327                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
328                 else
329                         *((uint16_t *)(dptr - 2)) =
330                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
331         }
332
333         ucode_cmd[1] = dptr;
334         ucode_cmd[2] = dptr;
335
336         /* Move to our line */
337         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
338
339         /* Write CPT instruction to lmt line */
340         vst1q_u64(laddr, cmd01);
341         vst1q_u64((laddr + 2), cmd23);
342
343         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
344         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
345
346         /* Move to next line for every other CPT inst */
347         *loff = !(*loff);
348         *lnum = *lnum + (*loff ? 0 : 1);
349         *shft = *shft + (*loff ? 0 : 3);
350 }
351
352 static __rte_always_inline void
353 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
354                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
355                    uint64_t sa_base, const uint16_t flags)
356 {
357         struct cn10k_sec_sess_priv sess_priv;
358         uint32_t pkt_len, dlen_adj, rlen;
359         struct nix_send_hdr_s *send_hdr;
360         uint8_t l3l4type, chksum;
361         uint64x2_t cmd01, cmd23;
362         union nix_send_sg_s *sg;
363         uintptr_t dptr, nixtx;
364         uint64_t ucode_cmd[4];
365         uint64_t *laddr;
366         uint8_t l2_len;
367         uint16_t tag;
368         uint64_t sa;
369
370         /* Move to our line from base */
371         sess_priv.u64 = *rte_security_dynfield(m);
372         send_hdr = (struct nix_send_hdr_s *)cmd;
373         if (flags & NIX_TX_NEED_EXT_HDR)
374                 sg = (union nix_send_sg_s *)&cmd[4];
375         else
376                 sg = (union nix_send_sg_s *)&cmd[2];
377
378         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
379                 l2_len = cmd[1] & 0xFF;
380                 /* Extract l3l4type either from il3il4type or ol3ol4type */
381                 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
382                     flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)
383                         l3l4type = (cmd[1] >> 40) & 0xFF;
384                 else
385                         l3l4type = (cmd[1] >> 32) & 0xFF;
386
387                 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
388                 chksum = ~chksum;
389                 sess_priv.chksum = sess_priv.chksum & chksum;
390                 /* Clear SEND header flags */
391                 cmd[1] &= ~(0xFFFFUL << 32);
392         } else {
393                 l2_len = m->l2_len;
394         }
395
396         /* Retrieve DPTR */
397         dptr = *(uint64_t *)(sg + 1);
398         pkt_len = send_hdr->w0.total;
399
400         /* Calculate dlen adj */
401         dlen_adj = pkt_len - l2_len;
402         rlen = (dlen_adj + sess_priv.roundup_len) +
403                (sess_priv.roundup_byte - 1);
404         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
405         rlen += sess_priv.partial_len;
406         dlen_adj = rlen - dlen_adj;
407
408         /* Update send descriptors. Security is single segment only */
409         send_hdr->w0.total = pkt_len + dlen_adj;
410         sg->seg1_size = pkt_len + dlen_adj;
411
412         /* Get area where NIX descriptor needs to be stored */
413         nixtx = dptr + pkt_len + dlen_adj;
414         nixtx += BIT_ULL(7);
415         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
416
417         /* Return nixtx addr */
418         *nixtx_addr = (nixtx + 16);
419
420         /* DLEN passed is excluding L2HDR */
421         pkt_len -= l2_len;
422         tag = sa_base & 0xFFFFUL;
423         sa_base &= ~0xFFFFUL;
424         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
425         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
426         ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
427                         ((uint64_t)sess_priv.chksum) << 32 | pkt_len);
428
429         /* CPT Word 0 and Word 1. Assume no multi-seg support */
430         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
431         /* CPT_RES_S is 16B above NIXTX */
432         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
433
434         /* CPT word 2 and 3 */
435         cmd23 = vdupq_n_u64(0);
436         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
437                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
438         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
439
440         dptr += l2_len;
441
442         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
443                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
444                         *((uint16_t *)(dptr - 2)) =
445                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
446                 else
447                         *((uint16_t *)(dptr - 2)) =
448                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
449         }
450         ucode_cmd[1] = dptr;
451         ucode_cmd[2] = dptr;
452
453         /* Move to our line */
454         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
455
456         /* Write CPT instruction to lmt line */
457         vst1q_u64(laddr, cmd01);
458         vst1q_u64((laddr + 2), cmd23);
459
460         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
461         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
462
463         /* Move to next line for every other CPT inst */
464         *loff = !(*loff);
465         *lnum = *lnum + (*loff ? 0 : 1);
466         *shft = *shft + (*loff ? 0 : 3);
467 }
468
469 #else
470
471 static __rte_always_inline void
472 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
473                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
474                    uint64_t sa_base, const uint16_t flags)
475 {
476         RTE_SET_USED(m);
477         RTE_SET_USED(cmd);
478         RTE_SET_USED(nixtx_addr);
479         RTE_SET_USED(lbase);
480         RTE_SET_USED(lnum);
481         RTE_SET_USED(loff);
482         RTE_SET_USED(shft);
483         RTE_SET_USED(sa_base);
484         RTE_SET_USED(flags);
485 }
486 #endif
487
488 static __rte_always_inline void
489 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
490 {
491         uint64_t mask, ol_flags = m->ol_flags;
492
493         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
494                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
495                 uint16_t *iplen, *oiplen, *oudplen;
496                 uint16_t lso_sb, paylen;
497
498                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
499                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
500                          m->l2_len + m->l3_len + m->l4_len;
501
502                 /* Reduce payload len from base headers */
503                 paylen = m->pkt_len - lso_sb;
504
505                 /* Get iplen position assuming no tunnel hdr */
506                 iplen = (uint16_t *)(mdata + m->l2_len +
507                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
508                 /* Handle tunnel tso */
509                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
510                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
511                         const uint8_t is_udp_tun =
512                                 (CNXK_NIX_UDP_TUN_BITMASK >>
513                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
514                                 0x1;
515
516                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
517                                               (2 << !!(ol_flags &
518                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
519                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
520                                                    paylen);
521
522                         /* Update format for UDP tunneled packet */
523                         if (is_udp_tun) {
524                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
525                                                        m->outer_l3_len + 4);
526                                 *oudplen = rte_cpu_to_be_16(
527                                         rte_be_to_cpu_16(*oudplen) - paylen);
528                         }
529
530                         /* Update iplen position to inner ip hdr */
531                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
532                                              m->l4_len +
533                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
534                 }
535
536                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
537         }
538 }
539
540 static __rte_always_inline void
541 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
542                        const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
543                        uint64_t mark_fmt)
544 {
545         uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
546         struct nix_send_ext_s *send_hdr_ext;
547         struct nix_send_hdr_s *send_hdr;
548         uint64_t ol_flags = 0, mask;
549         union nix_send_hdr_w1_u w1;
550         union nix_send_sg_s *sg;
551         uint16_t mark_form = 0;
552
553         send_hdr = (struct nix_send_hdr_s *)cmd;
554         if (flags & NIX_TX_NEED_EXT_HDR) {
555                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
556                 sg = (union nix_send_sg_s *)(cmd + 4);
557                 /* Clear previous markings */
558                 send_hdr_ext->w0.lso = 0;
559                 send_hdr_ext->w0.mark_en = 0;
560                 send_hdr_ext->w1.u = 0;
561                 ol_flags = m->ol_flags;
562         } else {
563                 sg = (union nix_send_sg_s *)(cmd + 2);
564         }
565
566         if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
567                 ol_flags = m->ol_flags;
568                 w1.u = 0;
569         }
570
571         if (!(flags & NIX_TX_MULTI_SEG_F))
572                 send_hdr->w0.total = m->data_len;
573         else
574                 send_hdr->w0.total = m->pkt_len;
575         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
576
577         /*
578          * L3type:  2 => IPV4
579          *          3 => IPV4 with csum
580          *          4 => IPV6
581          * L3type and L3ptr needs to be set for either
582          * L3 csum or L4 csum or LSO
583          *
584          */
585
586         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
587             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
588                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
589                 const uint8_t ol3type =
590                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
591                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
592                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
593
594                 /* Outer L3 */
595                 w1.ol3type = ol3type;
596                 mask = 0xffffull << ((!!ol3type) << 4);
597                 w1.ol3ptr = ~mask & m->outer_l2_len;
598                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
599
600                 /* Outer L4 */
601                 w1.ol4type = csum + (csum << 1);
602
603                 /* Inner L3 */
604                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
605                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
606                 w1.il3ptr = w1.ol4ptr + m->l2_len;
607                 w1.il4ptr = w1.il3ptr + m->l3_len;
608                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
609                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
610
611                 /* Inner L4 */
612                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
613
614                 /* In case of no tunnel header use only
615                  * shift IL3/IL4 fields a bit to use
616                  * OL3/OL4 for header checksum
617                  */
618                 mask = !ol3type;
619                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
620                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
621
622         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
623                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
624                 const uint8_t outer_l2_len = m->outer_l2_len;
625
626                 /* Outer L3 */
627                 w1.ol3ptr = outer_l2_len;
628                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
629                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
630                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
631                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
632                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
633
634                 /* Outer L4 */
635                 w1.ol4type = csum + (csum << 1);
636
637         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
638                 const uint8_t l2_len = m->l2_len;
639
640                 /* Always use OLXPTR and OLXTYPE when only
641                  * when one header is present
642                  */
643
644                 /* Inner L3 */
645                 w1.ol3ptr = l2_len;
646                 w1.ol4ptr = l2_len + m->l3_len;
647                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
648                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
649                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
650                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
651
652                 /* Inner L4 */
653                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
654         }
655
656         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
657                 const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
658                 const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
659                                                   RTE_MBUF_F_TX_IPV6));
660
661                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
662                 /* HW will update ptr after vlan0 update */
663                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
664                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
665
666                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
667                 /* 2B before end of l2 header */
668                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
669                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
670                 /* Fill for VLAN marking only when VLAN insertion enabled */
671                 mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
672                              (send_hdr_ext->w1.vlan1_ins_ena ||
673                               send_hdr_ext->w1.vlan0_ins_ena));
674
675                 /* Mask requested flags with packet data information */
676                 mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
677                 mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
678
679                 mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
680                 mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
681                 markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
682
683                 send_hdr_ext->w0.mark_en = !!mark_off;
684                 send_hdr_ext->w0.markform = mark_form & 0x7F;
685                 send_hdr_ext->w0.markptr = markptr;
686         }
687
688         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
689                 uint16_t lso_sb;
690                 uint64_t mask;
691
692                 mask = -(!w1.il3type);
693                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
694
695                 send_hdr_ext->w0.lso_sb = lso_sb;
696                 send_hdr_ext->w0.lso = 1;
697                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
698                 send_hdr_ext->w0.lso_format =
699                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
700                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
701
702                 /* Handle tunnel tso */
703                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
704                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
705                         const uint8_t is_udp_tun =
706                                 (CNXK_NIX_UDP_TUN_BITMASK >>
707                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
708                                 0x1;
709                         uint8_t shift = is_udp_tun ? 32 : 0;
710
711                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
712                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
713
714                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
715                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
716                         /* Update format for UDP tunneled packet */
717                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
718                 }
719         }
720
721         if (flags & NIX_TX_NEED_SEND_HDR_W1)
722                 send_hdr->w1.u = w1.u;
723
724         if (!(flags & NIX_TX_MULTI_SEG_F)) {
725                 sg->seg1_size = send_hdr->w0.total;
726                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
727
728                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
729                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
730                          *              is greater than 1
731                          * DF bit = 0 otherwise
732                          */
733                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
734                 }
735                 /* Mark mempool object as "put" since it is freed by NIX */
736                 if (!send_hdr->w0.df)
737                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
738         } else {
739                 sg->seg1_size = m->data_len;
740                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
741
742                 /* NOFF is handled later for multi-seg */
743         }
744
745         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
746                 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
747 }
748
749 static __rte_always_inline void
750 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
751                            const uint16_t flags)
752 {
753         struct nix_send_ext_s *send_hdr_ext;
754         union nix_send_sg_s *sg;
755
756         /* With minimal offloads, 'cmd' being local could be optimized out to
757          * registers. In other cases, 'cmd' will be in stack. Intent is
758          * 'cmd' stores content from txq->cmd which is copied only once.
759          */
760         *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
761         lmt_addr += 16;
762         if (flags & NIX_TX_NEED_EXT_HDR) {
763                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
764                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
765                 lmt_addr += 16;
766
767                 sg = (union nix_send_sg_s *)(cmd + 4);
768         } else {
769                 sg = (union nix_send_sg_s *)(cmd + 2);
770         }
771         /* In case of multi-seg, sg template is stored here */
772         *((union nix_send_sg_s *)lmt_addr) = *sg;
773         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
774 }
775
776 static __rte_always_inline void
777 cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
778                               const uint64_t ol_flags, const uint16_t no_segdw,
779                               const uint16_t flags)
780 {
781         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
782                 const uint8_t is_ol_tstamp =
783                         !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
784                 uint64_t *lmt = (uint64_t *)lmt_addr;
785                 uint16_t off = (no_segdw - 1) << 1;
786                 struct nix_send_mem_s *send_mem;
787
788                 send_mem = (struct nix_send_mem_s *)(lmt + off);
789                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
790                  * should not be recorded, hence changing the alg type to
791                  * NIX_SENDMEMALG_SUB and also changing send mem addr field to
792                  * next 8 bytes as it corrupts the actual Tx tstamp registered
793                  * address.
794                  */
795                 send_mem->w0.subdc = NIX_SUBDC_MEM;
796                 send_mem->w0.alg =
797                         NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
798                 send_mem->addr =
799                         (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
800         }
801 }
802
803 static __rte_always_inline uint16_t
804 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
805 {
806         struct nix_send_hdr_s *send_hdr;
807         union nix_send_sg_s *sg;
808         struct rte_mbuf *m_next;
809         uint64_t *slist, sg_u;
810         uint64_t nb_segs;
811         uint64_t segdw;
812         uint8_t off, i;
813
814         send_hdr = (struct nix_send_hdr_s *)cmd;
815
816         if (flags & NIX_TX_NEED_EXT_HDR)
817                 off = 2;
818         else
819                 off = 0;
820
821         sg = (union nix_send_sg_s *)&cmd[2 + off];
822
823         /* Start from second segment, first segment is already there */
824         i = 1;
825         sg_u = sg->u;
826         nb_segs = m->nb_segs - 1;
827         m_next = m->next;
828         slist = &cmd[3 + off + 1];
829
830         /* Set invert df if buffer is not to be freed by H/W */
831         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
832                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
833
834                 /* Mark mempool object as "put" since it is freed by NIX */
835 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
836         if (!(sg_u & (1ULL << 55)))
837                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
838         rte_io_wmb();
839 #endif
840         m = m_next;
841         if (!m)
842                 goto done;
843
844         /* Fill mbuf segments */
845         do {
846                 m_next = m->next;
847                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
848                 *slist = rte_mbuf_data_iova(m);
849                 /* Set invert df if buffer is not to be freed by H/W */
850                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
851                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
852                         /* Mark mempool object as "put" since it is freed by NIX
853                          */
854 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
855                 if (!(sg_u & (1ULL << (i + 55))))
856                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
857 #endif
858                 slist++;
859                 i++;
860                 nb_segs--;
861                 if (i > 2 && nb_segs) {
862                         i = 0;
863                         /* Next SG subdesc */
864                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
865                         sg->u = sg_u;
866                         sg->segs = 3;
867                         sg = (union nix_send_sg_s *)slist;
868                         sg_u = sg->u;
869                         slist++;
870                 }
871                 m = m_next;
872         } while (nb_segs);
873
874 done:
875         sg->u = sg_u;
876         sg->segs = i;
877         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
878         /* Roundup extra dwords to multiple of 2 */
879         segdw = (segdw >> 1) + (segdw & 0x1);
880         /* Default dwords */
881         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
882         send_hdr->w0.sizem1 = segdw - 1;
883
884         return segdw;
885 }
886
887 static __rte_always_inline uint16_t
888 cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
889                     uint16_t pkts, uint64_t *cmd, const uint16_t flags)
890 {
891         struct cn10k_eth_txq *txq = tx_queue;
892         const rte_iova_t io_addr = txq->io_addr;
893         uint8_t lnum, c_lnum, c_shft, c_loff;
894         uintptr_t pa, lbase = txq->lmt_base;
895         uint16_t lmt_id, burst, left, i;
896         uintptr_t c_lbase = lbase;
897         uint64_t lso_tun_fmt = 0;
898         uint64_t mark_fmt = 0;
899         uint8_t mark_flag = 0;
900         rte_iova_t c_io_addr;
901         uint16_t c_lmt_id;
902         uint64_t sa_base;
903         uintptr_t laddr;
904         uint64_t data;
905         bool sec;
906
907         if (!(flags & NIX_TX_VWQE_F)) {
908                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
909                 /* Reduce the cached count */
910                 txq->fc_cache_pkts -= pkts;
911         }
912         /* Get cmd skeleton */
913         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
914
915         if (flags & NIX_TX_OFFLOAD_TSO_F)
916                 lso_tun_fmt = txq->lso_tun_fmt;
917
918         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
919                 mark_fmt = txq->mark_fmt;
920                 mark_flag = txq->mark_flag;
921         }
922
923         /* Get LMT base address and LMT ID as lcore id */
924         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
925         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
926                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
927                 c_io_addr = txq->cpt_io_addr;
928                 sa_base = txq->sa_base;
929         }
930
931         left = pkts;
932 again:
933         burst = left > 32 ? 32 : left;
934
935         lnum = 0;
936         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
937                 c_lnum = 0;
938                 c_loff = 0;
939                 c_shft = 16;
940         }
941
942         for (i = 0; i < burst; i++) {
943                 /* Perform header writes for TSO, barrier at
944                  * lmt steorl will suffice.
945                  */
946                 if (flags & NIX_TX_OFFLOAD_TSO_F)
947                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
948
949                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
950                                        &sec, mark_flag, mark_fmt);
951
952                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
953
954                 /* Prepare CPT instruction and get nixtx addr */
955                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
956                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
957                                            &c_lnum, &c_loff, &c_shft, sa_base,
958                                            flags);
959
960                 /* Move NIX desc to LMT/NIXTX area */
961                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
962                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
963                                               4, flags);
964                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
965                         lnum++;
966         }
967
968         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
969                 ws[1] = roc_sso_hws_head_wait(ws[0]);
970
971         left -= burst;
972         tx_pkts += burst;
973
974         /* Submit CPT instructions if any */
975         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
976                 /* Reduce pkts to be sent to CPT */
977                 burst -= ((c_lnum << 1) + c_loff);
978                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
979                                      c_shft);
980         }
981
982         /* Trigger LMTST */
983         if (burst > 16) {
984                 data = cn10k_nix_tx_steor_data(flags);
985                 pa = io_addr | (data & 0x7) << 4;
986                 data &= ~0x7ULL;
987                 data |= (15ULL << 12);
988                 data |= (uint64_t)lmt_id;
989
990                 /* STEOR0 */
991                 roc_lmt_submit_steorl(data, pa);
992
993                 data = cn10k_nix_tx_steor_data(flags);
994                 pa = io_addr | (data & 0x7) << 4;
995                 data &= ~0x7ULL;
996                 data |= ((uint64_t)(burst - 17)) << 12;
997                 data |= (uint64_t)(lmt_id + 16);
998
999                 /* STEOR1 */
1000                 roc_lmt_submit_steorl(data, pa);
1001         } else if (burst) {
1002                 data = cn10k_nix_tx_steor_data(flags);
1003                 pa = io_addr | (data & 0x7) << 4;
1004                 data &= ~0x7ULL;
1005                 data |= ((uint64_t)(burst - 1)) << 12;
1006                 data |= lmt_id;
1007
1008                 /* STEOR0 */
1009                 roc_lmt_submit_steorl(data, pa);
1010         }
1011
1012         rte_io_wmb();
1013         if (left)
1014                 goto again;
1015
1016         return pkts;
1017 }
1018
1019 static __rte_always_inline uint16_t
1020 cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
1021                          struct rte_mbuf **tx_pkts, uint16_t pkts,
1022                          uint64_t *cmd, const uint16_t flags)
1023 {
1024         struct cn10k_eth_txq *txq = tx_queue;
1025         uintptr_t pa0, pa1, lbase = txq->lmt_base;
1026         const rte_iova_t io_addr = txq->io_addr;
1027         uint16_t segdw, lmt_id, burst, left, i;
1028         uint8_t lnum, c_lnum, c_loff;
1029         uintptr_t c_lbase = lbase;
1030         uint64_t lso_tun_fmt = 0;
1031         uint64_t mark_fmt = 0;
1032         uint8_t mark_flag = 0;
1033         uint64_t data0, data1;
1034         rte_iova_t c_io_addr;
1035         uint8_t shft, c_shft;
1036         __uint128_t data128;
1037         uint16_t c_lmt_id;
1038         uint64_t sa_base;
1039         uintptr_t laddr;
1040         bool sec;
1041
1042         if (!(flags & NIX_TX_VWQE_F)) {
1043                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1044                 /* Reduce the cached count */
1045                 txq->fc_cache_pkts -= pkts;
1046         }
1047         /* Get cmd skeleton */
1048         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
1049
1050         if (flags & NIX_TX_OFFLOAD_TSO_F)
1051                 lso_tun_fmt = txq->lso_tun_fmt;
1052
1053         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1054                 mark_fmt = txq->mark_fmt;
1055                 mark_flag = txq->mark_flag;
1056         }
1057
1058         /* Get LMT base address and LMT ID as lcore id */
1059         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1060         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1061                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1062                 c_io_addr = txq->cpt_io_addr;
1063                 sa_base = txq->sa_base;
1064         }
1065
1066         left = pkts;
1067 again:
1068         burst = left > 32 ? 32 : left;
1069         shft = 16;
1070         data128 = 0;
1071
1072         lnum = 0;
1073         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1074                 c_lnum = 0;
1075                 c_loff = 0;
1076                 c_shft = 16;
1077         }
1078
1079         for (i = 0; i < burst; i++) {
1080                 /* Perform header writes for TSO, barrier at
1081                  * lmt steorl will suffice.
1082                  */
1083                 if (flags & NIX_TX_OFFLOAD_TSO_F)
1084                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1085
1086                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1087                                        &sec, mark_flag, mark_fmt);
1088
1089                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1090
1091                 /* Prepare CPT instruction and get nixtx addr */
1092                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1093                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1094                                            &c_lnum, &c_loff, &c_shft, sa_base,
1095                                            flags);
1096
1097                 /* Move NIX desc to LMT/NIXTX area */
1098                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1099                 /* Store sg list directly on lmt line */
1100                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1101                                                flags);
1102                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1103                                               segdw, flags);
1104                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1105                         lnum++;
1106                         data128 |= (((__uint128_t)(segdw - 1)) << shft);
1107                         shft += 3;
1108                 }
1109         }
1110
1111         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1112                 ws[1] = roc_sso_hws_head_wait(ws[0]);
1113
1114         left -= burst;
1115         tx_pkts += burst;
1116
1117         /* Submit CPT instructions if any */
1118         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1119                 /* Reduce pkts to be sent to CPT */
1120                 burst -= ((c_lnum << 1) + c_loff);
1121                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1122                                      c_shft);
1123         }
1124
1125         data0 = (uint64_t)data128;
1126         data1 = (uint64_t)(data128 >> 64);
1127         /* Make data0 similar to data1 */
1128         data0 >>= 16;
1129         /* Trigger LMTST */
1130         if (burst > 16) {
1131                 pa0 = io_addr | (data0 & 0x7) << 4;
1132                 data0 &= ~0x7ULL;
1133                 /* Move lmtst1..15 sz to bits 63:19 */
1134                 data0 <<= 16;
1135                 data0 |= (15ULL << 12);
1136                 data0 |= (uint64_t)lmt_id;
1137
1138                 /* STEOR0 */
1139                 roc_lmt_submit_steorl(data0, pa0);
1140
1141                 pa1 = io_addr | (data1 & 0x7) << 4;
1142                 data1 &= ~0x7ULL;
1143                 data1 <<= 16;
1144                 data1 |= ((uint64_t)(burst - 17)) << 12;
1145                 data1 |= (uint64_t)(lmt_id + 16);
1146
1147                 /* STEOR1 */
1148                 roc_lmt_submit_steorl(data1, pa1);
1149         } else if (burst) {
1150                 pa0 = io_addr | (data0 & 0x7) << 4;
1151                 data0 &= ~0x7ULL;
1152                 /* Move lmtst1..15 sz to bits 63:19 */
1153                 data0 <<= 16;
1154                 data0 |= ((burst - 1) << 12);
1155                 data0 |= (uint64_t)lmt_id;
1156
1157                 /* STEOR0 */
1158                 roc_lmt_submit_steorl(data0, pa0);
1159         }
1160
1161         rte_io_wmb();
1162         if (left)
1163                 goto again;
1164
1165         return pkts;
1166 }
1167
1168 #if defined(RTE_ARCH_ARM64)
1169
1170 static __rte_always_inline void
1171 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1172                       union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1173                       const uint64_t flags, const uint64_t lso_tun_fmt)
1174 {
1175         uint16_t lso_sb;
1176         uint64_t mask;
1177
1178         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1179                 return;
1180
1181         mask = -(!w1->il3type);
1182         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1183
1184         w0->u |= BIT(14);
1185         w0->lso_sb = lso_sb;
1186         w0->lso_mps = m->tso_segsz;
1187         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1188         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1189
1190         /* Handle tunnel tso */
1191         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1192             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1193                 const uint8_t is_udp_tun =
1194                         (CNXK_NIX_UDP_TUN_BITMASK >>
1195                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1196                         0x1;
1197                 uint8_t shift = is_udp_tun ? 32 : 0;
1198
1199                 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1200                 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1201
1202                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1203                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1204                 /* Update format for UDP tunneled packet */
1205
1206                 w0->lso_format = (lso_tun_fmt >> shift);
1207         }
1208 }
1209
1210 static __rte_always_inline void
1211 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1212                                 union nix_send_hdr_w0_u *sh,
1213                                 union nix_send_sg_s *sg, const uint32_t flags)
1214 {
1215         struct rte_mbuf *m_next;
1216         uint64_t *slist, sg_u;
1217         uint16_t nb_segs;
1218         int i = 1;
1219
1220         sh->total = m->pkt_len;
1221         /* Clear sg->u header before use */
1222         sg->u &= 0xFC00000000000000;
1223         sg_u = sg->u;
1224         slist = &cmd[0];
1225
1226         sg_u = sg_u | ((uint64_t)m->data_len);
1227
1228         nb_segs = m->nb_segs - 1;
1229         m_next = m->next;
1230
1231         /* Set invert df if buffer is not to be freed by H/W */
1232         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1233                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1234                 /* Mark mempool object as "put" since it is freed by NIX */
1235 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1236         if (!(sg_u & (1ULL << 55)))
1237                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1238         rte_io_wmb();
1239 #endif
1240
1241         m = m_next;
1242         /* Fill mbuf segments */
1243         do {
1244                 m_next = m->next;
1245                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1246                 *slist = rte_mbuf_data_iova(m);
1247                 /* Set invert df if buffer is not to be freed by H/W */
1248                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1249                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1250                         /* Mark mempool object as "put" since it is freed by NIX
1251                          */
1252 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1253                 if (!(sg_u & (1ULL << (i + 55))))
1254                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1255                 rte_io_wmb();
1256 #endif
1257                 slist++;
1258                 i++;
1259                 nb_segs--;
1260                 if (i > 2 && nb_segs) {
1261                         i = 0;
1262                         /* Next SG subdesc */
1263                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1264                         sg->u = sg_u;
1265                         sg->segs = 3;
1266                         sg = (union nix_send_sg_s *)slist;
1267                         sg_u = sg->u;
1268                         slist++;
1269                 }
1270                 m = m_next;
1271         } while (nb_segs);
1272
1273         sg->u = sg_u;
1274         sg->segs = i;
1275 }
1276
1277 static __rte_always_inline void
1278 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1279                            uint64x2_t *cmd1, const uint8_t segdw,
1280                            const uint32_t flags)
1281 {
1282         union nix_send_hdr_w0_u sh;
1283         union nix_send_sg_s sg;
1284
1285         if (m->nb_segs == 1) {
1286                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1287                         sg.u = vgetq_lane_u64(cmd1[0], 0);
1288                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1289                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1290                 }
1291
1292 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1293                 sg.u = vgetq_lane_u64(cmd1[0], 0);
1294                 if (!(sg.u & (1ULL << 55)))
1295                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1296                 rte_io_wmb();
1297 #endif
1298                 return;
1299         }
1300
1301         sh.u = vgetq_lane_u64(cmd0[0], 0);
1302         sg.u = vgetq_lane_u64(cmd1[0], 0);
1303
1304         cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1305
1306         sh.sizem1 = segdw - 1;
1307         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1308         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1309 }
1310
1311 #define NIX_DESCS_PER_LOOP 4
1312
1313 static __rte_always_inline uint8_t
1314 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1315                                uint64x2_t *cmd1, uint64x2_t *cmd2,
1316                                uint64x2_t *cmd3, uint8_t *segdw,
1317                                uint64_t *lmt_addr, __uint128_t *data128,
1318                                uint8_t *shift, const uint16_t flags)
1319 {
1320         uint8_t j, off, lmt_used;
1321
1322         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1323             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1324                 /* No segments in 4 consecutive packets. */
1325                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1326                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1327                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1328                                                            &cmd0[j], &cmd1[j],
1329                                                            segdw[j], flags);
1330                         vst1q_u64(lmt_addr, cmd0[0]);
1331                         vst1q_u64(lmt_addr + 2, cmd1[0]);
1332                         vst1q_u64(lmt_addr + 4, cmd0[1]);
1333                         vst1q_u64(lmt_addr + 6, cmd1[1]);
1334                         vst1q_u64(lmt_addr + 8, cmd0[2]);
1335                         vst1q_u64(lmt_addr + 10, cmd1[2]);
1336                         vst1q_u64(lmt_addr + 12, cmd0[3]);
1337                         vst1q_u64(lmt_addr + 14, cmd1[3]);
1338
1339                         *data128 |= ((__uint128_t)7) << *shift;
1340                         *shift += 3;
1341
1342                         return 1;
1343                 }
1344         }
1345
1346         lmt_used = 0;
1347         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1348                 /* Fit consecutive packets in same LMTLINE. */
1349                 if ((segdw[j] + segdw[j + 1]) <= 8) {
1350                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1351                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1352                                                            &cmd0[j], &cmd1[j],
1353                                                            segdw[j], flags);
1354                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1355                                                            &cmd0[j + 1],
1356                                                            &cmd1[j + 1],
1357                                                            segdw[j + 1], flags);
1358                                 /* TSTAMP takes 4 each, no segs. */
1359                                 vst1q_u64(lmt_addr, cmd0[j]);
1360                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1361                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1362                                 vst1q_u64(lmt_addr + 6, cmd3[j]);
1363
1364                                 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1365                                 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1366                                 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1367                                 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1368                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1369                                 /* EXT header take 3 each, space for 2 segs.*/
1370                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1371                                                            lmt_addr + 6,
1372                                                            &cmd0[j], &cmd1[j],
1373                                                            segdw[j], flags);
1374                                 vst1q_u64(lmt_addr, cmd0[j]);
1375                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1376                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1377                                 off = segdw[j] - 3;
1378                                 off <<= 1;
1379                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1380                                                            lmt_addr + 12 + off,
1381                                                            &cmd0[j + 1],
1382                                                            &cmd1[j + 1],
1383                                                            segdw[j + 1], flags);
1384                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1385                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1386                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1387                         } else {
1388                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1389                                                            lmt_addr + 4,
1390                                                            &cmd0[j], &cmd1[j],
1391                                                            segdw[j], flags);
1392                                 vst1q_u64(lmt_addr, cmd0[j]);
1393                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1394                                 off = segdw[j] - 2;
1395                                 off <<= 1;
1396                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1397                                                            lmt_addr + 8 + off,
1398                                                            &cmd0[j + 1],
1399                                                            &cmd1[j + 1],
1400                                                            segdw[j + 1], flags);
1401                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1402                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1403                         }
1404                         *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1405                                     << *shift;
1406                         *shift += 3;
1407                         j += 2;
1408                 } else {
1409                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
1410                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1411                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1412                                                            lmt_addr + 6,
1413                                                            &cmd0[j], &cmd1[j],
1414                                                            segdw[j], flags);
1415                                 vst1q_u64(lmt_addr, cmd0[j]);
1416                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1417                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1418                                 off = segdw[j] - 4;
1419                                 off <<= 1;
1420                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1421                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1422                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1423                                                            lmt_addr + 6,
1424                                                            &cmd0[j], &cmd1[j],
1425                                                            segdw[j], flags);
1426                                 vst1q_u64(lmt_addr, cmd0[j]);
1427                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1428                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1429                         } else {
1430                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1431                                                            lmt_addr + 4,
1432                                                            &cmd0[j], &cmd1[j],
1433                                                            segdw[j], flags);
1434                                 vst1q_u64(lmt_addr, cmd0[j]);
1435                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1436                         }
1437                         *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1438                         *shift += 3;
1439                         j++;
1440                 }
1441                 lmt_used++;
1442                 lmt_addr += 16;
1443         }
1444
1445         return lmt_used;
1446 }
1447
1448 static __rte_always_inline void
1449 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1450                    uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1451 {
1452         /* Go to next line if we are out of space */
1453         if ((*loff + (dw << 4)) > 128) {
1454                 *data128 = *data128 |
1455                            (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1456                 *shift = *shift + 3;
1457                 *loff = 0;
1458                 *lnum = *lnum + 1;
1459         }
1460
1461         *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1462         *loff = *loff + (dw << 4);
1463 }
1464
1465 static __rte_always_inline void
1466 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1467                      uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1468                      uint64x2_t cmd3, const uint16_t flags)
1469 {
1470         uint8_t off;
1471
1472         /* Handle no fast free when security is enabled without mseg */
1473         if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1474             (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1475             !(flags & NIX_TX_MULTI_SEG_F)) {
1476                 union nix_send_sg_s sg;
1477
1478                 sg.u = vgetq_lane_u64(cmd1, 0);
1479                 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1480                 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1481
1482 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1483                 sg.u = vgetq_lane_u64(cmd1, 0);
1484                 if (!(sg.u & (1ULL << 55)))
1485                         RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1486                                                 0);
1487                 rte_io_wmb();
1488 #endif
1489         }
1490         if (flags & NIX_TX_MULTI_SEG_F) {
1491                 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1492                     (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1493                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1494                                                    &cmd0, &cmd1, segdw, flags);
1495                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1496                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1497                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1498                         off = segdw - 4;
1499                         off <<= 4;
1500                         vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1501                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1502                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1503                                                    &cmd0, &cmd1, segdw, flags);
1504                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1505                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1506                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1507                 } else {
1508                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1509                                                    &cmd0, &cmd1, segdw, flags);
1510                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1511                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1512                 }
1513         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1514                 /* Store the prepared send desc to LMT lines */
1515                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1516                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1517                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1518                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1519                         vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1520                 } else {
1521                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1522                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1523                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1524                 }
1525         } else {
1526                 /* Store the prepared send desc to LMT lines */
1527                 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1528                 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1529         }
1530 }
1531
1532 static __rte_always_inline uint16_t
1533 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1534                            struct rte_mbuf **tx_pkts, uint16_t pkts,
1535                            uint64_t *cmd, const uint16_t flags)
1536 {
1537         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1538         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1539         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1540                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1541         uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1542         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1543         uint64x2_t senddesc01_w0, senddesc23_w0;
1544         uint64x2_t senddesc01_w1, senddesc23_w1;
1545         uint64x2_t sendext01_w0, sendext23_w0;
1546         uint64x2_t sendext01_w1, sendext23_w1;
1547         uint64x2_t sendmem01_w0, sendmem23_w0;
1548         uint64x2_t sendmem01_w1, sendmem23_w1;
1549         uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1550         uint64x2_t sgdesc01_w0, sgdesc23_w0;
1551         uint64x2_t sgdesc01_w1, sgdesc23_w1;
1552         struct cn10k_eth_txq *txq = tx_queue;
1553         rte_iova_t io_addr = txq->io_addr;
1554         uintptr_t laddr = txq->lmt_base;
1555         uint8_t c_lnum, c_shft, c_loff;
1556         uint64x2_t ltypes01, ltypes23;
1557         uint64x2_t xtmp128, ytmp128;
1558         uint64x2_t xmask01, xmask23;
1559         uintptr_t c_laddr = laddr;
1560         uint8_t lnum, shift, loff;
1561         rte_iova_t c_io_addr;
1562         uint64_t sa_base;
1563         union wdata {
1564                 __uint128_t data128;
1565                 uint64_t data[2];
1566         } wd;
1567
1568         if (!(flags & NIX_TX_VWQE_F)) {
1569                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1570                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1571                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1572                 /* Reduce the cached count */
1573                 txq->fc_cache_pkts -= pkts;
1574         } else {
1575                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1576                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1577         }
1578
1579         /* Perform header writes before barrier for TSO */
1580         if (flags & NIX_TX_OFFLOAD_TSO_F) {
1581                 for (i = 0; i < pkts; i++)
1582                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1583         }
1584
1585         if (!(flags & NIX_TX_VWQE_F)) {
1586                 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1587         } else {
1588                 uint64_t w0 =
1589                         (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1590                         ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1591
1592                 senddesc01_w0 = vdupq_n_u64(w0);
1593         }
1594         senddesc23_w0 = senddesc01_w0;
1595
1596         senddesc01_w1 = vdupq_n_u64(0);
1597         senddesc23_w1 = senddesc01_w1;
1598         sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1599         sgdesc23_w0 = sgdesc01_w0;
1600
1601         if (flags & NIX_TX_NEED_EXT_HDR) {
1602                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1603                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1604                                                    BIT_ULL(15));
1605                         sendmem01_w0 =
1606                                 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1607                                             (NIX_SENDMEMALG_SETTSTMP << 56));
1608                         sendmem23_w0 = sendmem01_w0;
1609                         sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1610                         sendmem23_w1 = sendmem01_w1;
1611                 } else {
1612                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1613                 }
1614                 sendext23_w0 = sendext01_w0;
1615
1616                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1617                         sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1618                 else
1619                         sendext01_w1 = vdupq_n_u64(0);
1620                 sendext23_w1 = sendext01_w1;
1621         }
1622
1623         /* Get LMT base address and LMT ID as lcore id */
1624         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1625         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1626                 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1627                 c_io_addr = txq->cpt_io_addr;
1628                 sa_base = txq->sa_base;
1629         }
1630
1631         left = pkts;
1632 again:
1633         /* Number of packets to prepare depends on offloads enabled. */
1634         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1635                               cn10k_nix_pkts_per_vec_brst(flags) :
1636                               left;
1637         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1638                 wd.data128 = 0;
1639                 shift = 16;
1640         }
1641         lnum = 0;
1642         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1643                 loff = 0;
1644                 c_loff = 0;
1645                 c_lnum = 0;
1646                 c_shft = 16;
1647         }
1648
1649         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1650                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1651                         burst = i;
1652                         break;
1653                 }
1654
1655                 if (flags & NIX_TX_MULTI_SEG_F) {
1656                         uint8_t j;
1657
1658                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1659                                 struct rte_mbuf *m = tx_pkts[j];
1660
1661                                 /* Get dwords based on nb_segs. */
1662                                 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1663                                 /* Add dwords based on offloads. */
1664                                 segdw[j] += 1 + /* SEND HDR */
1665                                             !!(flags & NIX_TX_NEED_EXT_HDR) +
1666                                             !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1667                         }
1668
1669                         /* Check if there are enough LMTLINES for this loop */
1670                         if (lnum + 4 > 32) {
1671                                 uint8_t ldwords_con = 0, lneeded = 0;
1672                                 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1673                                         ldwords_con += segdw[j];
1674                                         if (ldwords_con > 8) {
1675                                                 lneeded += 1;
1676                                                 ldwords_con = segdw[j];
1677                                         }
1678                                 }
1679                                 lneeded += 1;
1680                                 if (lnum + lneeded > 32) {
1681                                         burst = i;
1682                                         break;
1683                                 }
1684                         }
1685                 }
1686                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1687                 senddesc01_w0 =
1688                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1689                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1690
1691                 senddesc23_w0 = senddesc01_w0;
1692                 sgdesc23_w0 = sgdesc01_w0;
1693
1694                 /* Clear vlan enables. */
1695                 if (flags & NIX_TX_NEED_EXT_HDR) {
1696                         sendext01_w1 = vbicq_u64(sendext01_w1,
1697                                                  vdupq_n_u64(0x3FFFF00FFFF00));
1698                         sendext23_w1 = sendext01_w1;
1699                 }
1700
1701                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1702                         /* Reset send mem alg to SETTSTMP from SUB*/
1703                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
1704                                                  vdupq_n_u64(BIT_ULL(59)));
1705                         /* Reset send mem address to default. */
1706                         sendmem01_w1 =
1707                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1708                         sendmem23_w0 = sendmem01_w0;
1709                         sendmem23_w1 = sendmem01_w1;
1710                 }
1711
1712                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1713                         /* Clear the LSO enable bit. */
1714                         sendext01_w0 = vbicq_u64(sendext01_w0,
1715                                                  vdupq_n_u64(BIT_ULL(14)));
1716                         sendext23_w0 = sendext01_w0;
1717                 }
1718
1719                 /* Move mbufs to iova */
1720                 mbuf0 = (uint64_t *)tx_pkts[0];
1721                 mbuf1 = (uint64_t *)tx_pkts[1];
1722                 mbuf2 = (uint64_t *)tx_pkts[2];
1723                 mbuf3 = (uint64_t *)tx_pkts[3];
1724
1725                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1726                                      offsetof(struct rte_mbuf, buf_iova));
1727                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1728                                      offsetof(struct rte_mbuf, buf_iova));
1729                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1730                                      offsetof(struct rte_mbuf, buf_iova));
1731                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1732                                      offsetof(struct rte_mbuf, buf_iova));
1733                 /*
1734                  * Get mbuf's, olflags, iova, pktlen, dataoff
1735                  * dataoff_iovaX.D[0] = iova,
1736                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1737                  * len_olflagsX.D[0] = ol_flags,
1738                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1739                  */
1740                 dataoff_iova0 = vld1q_u64(mbuf0);
1741                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1742                 dataoff_iova1 = vld1q_u64(mbuf1);
1743                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1744                 dataoff_iova2 = vld1q_u64(mbuf2);
1745                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1746                 dataoff_iova3 = vld1q_u64(mbuf3);
1747                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1748
1749                 /* Move mbufs to point pool */
1750                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1751                                      offsetof(struct rte_mbuf, pool) -
1752                                      offsetof(struct rte_mbuf, buf_iova));
1753                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1754                                      offsetof(struct rte_mbuf, pool) -
1755                                      offsetof(struct rte_mbuf, buf_iova));
1756                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1757                                      offsetof(struct rte_mbuf, pool) -
1758                                      offsetof(struct rte_mbuf, buf_iova));
1759                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1760                                      offsetof(struct rte_mbuf, pool) -
1761                                      offsetof(struct rte_mbuf, buf_iova));
1762
1763                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1764                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1765                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1766                         /*
1767                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1768                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1769                          */
1770
1771                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1772                                      : [a] "+w"(senddesc01_w1)
1773                                      : [in] "r"(mbuf0 + 2)
1774                                      : "memory");
1775
1776                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1777                                      : [a] "+w"(senddesc01_w1)
1778                                      : [in] "r"(mbuf1 + 2)
1779                                      : "memory");
1780
1781                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1782                                      : [b] "+w"(senddesc23_w1)
1783                                      : [in] "r"(mbuf2 + 2)
1784                                      : "memory");
1785
1786                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1787                                      : [b] "+w"(senddesc23_w1)
1788                                      : [in] "r"(mbuf3 + 2)
1789                                      : "memory");
1790
1791                         /* Get pool pointer alone */
1792                         mbuf0 = (uint64_t *)*mbuf0;
1793                         mbuf1 = (uint64_t *)*mbuf1;
1794                         mbuf2 = (uint64_t *)*mbuf2;
1795                         mbuf3 = (uint64_t *)*mbuf3;
1796                 } else {
1797                         /* Get pool pointer alone */
1798                         mbuf0 = (uint64_t *)*mbuf0;
1799                         mbuf1 = (uint64_t *)*mbuf1;
1800                         mbuf2 = (uint64_t *)*mbuf2;
1801                         mbuf3 = (uint64_t *)*mbuf3;
1802                 }
1803
1804                 const uint8x16_t shuf_mask2 = {
1805                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1806                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1807                 };
1808                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1809                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1810
1811                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1812                 const uint64x2_t and_mask0 = {
1813                         0xFFFFFFFFFFFFFFFF,
1814                         0x000000000000FFFF,
1815                 };
1816
1817                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1818                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1819                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1820                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1821
1822                 /*
1823                  * Pick only 16 bits of pktlen preset at bits 63:32
1824                  * and place them at bits 15:0.
1825                  */
1826                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1827                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1828
1829                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1830                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1831                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1832
1833                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1834                  * pktlen at 15:0 position.
1835                  */
1836                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1837                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1838                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1839                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1840
1841                 /* Move mbuf to point to pool_id. */
1842                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1843                                      offsetof(struct rte_mempool, pool_id));
1844                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1845                                      offsetof(struct rte_mempool, pool_id));
1846                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1847                                      offsetof(struct rte_mempool, pool_id));
1848                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1849                                      offsetof(struct rte_mempool, pool_id));
1850
1851                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1852                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1853                         /*
1854                          * Lookup table to translate ol_flags to
1855                          * il3/il4 types. But we still use ol3/ol4 types in
1856                          * senddesc_w1 as only one header processing is enabled.
1857                          */
1858                         const uint8x16_t tbl = {
1859                                 /* [0-15] = il4type:il3type */
1860                                 0x04, /* none (IPv6 assumed) */
1861                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1862                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1863                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1864                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1865                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1866                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1867                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1868                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1869                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1870                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1871                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1872                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1873                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1874                                        * RTE_MBUF_F_TX_TCP_CKSUM
1875                                        */
1876                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1877                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1878                                        */
1879                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1880                                        * RTE_MBUF_F_TX_UDP_CKSUM
1881                                        */
1882                         };
1883
1884                         /* Extract olflags to translate to iltypes */
1885                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1886                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1887
1888                         /*
1889                          * E(47):L3_LEN(9):L2_LEN(7+z)
1890                          * E(47):L3_LEN(9):L2_LEN(7+z)
1891                          */
1892                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1893                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1894
1895                         /* Move OLFLAGS bits 55:52 to 51:48
1896                          * with zeros preprended on the byte and rest
1897                          * don't care
1898                          */
1899                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1900                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1901                         /*
1902                          * E(48):L3_LEN(8):L2_LEN(z+7)
1903                          * E(48):L3_LEN(8):L2_LEN(z+7)
1904                          */
1905                         const int8x16_t tshft3 = {
1906                                 -1, 0, 8, 8, 8, 8, 8, 8,
1907                                 -1, 0, 8, 8, 8, 8, 8, 8,
1908                         };
1909
1910                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1911                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1912
1913                         /* Do the lookup */
1914                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1915                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1916
1917                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1918                          * and place it in ol3/ol4type of senddesc_w1
1919                          */
1920                         const uint8x16_t shuf_mask0 = {
1921                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1922                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1923                         };
1924
1925                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1926                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1927
1928                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1929                          * a [E(32):E(16):OL3(8):OL2(8)]
1930                          * a = a + (a << 8)
1931                          * a [E(32):E(16):(OL3+OL2):OL2]
1932                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1933                          */
1934                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1935                                                  vshlq_n_u16(senddesc01_w1, 8));
1936                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1937                                                  vshlq_n_u16(senddesc23_w1, 8));
1938
1939                         /* Move ltypes to senddesc*_w1 */
1940                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1941                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1942                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1943                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1944                         /*
1945                          * Lookup table to translate ol_flags to
1946                          * ol3/ol4 types.
1947                          */
1948
1949                         const uint8x16_t tbl = {
1950                                 /* [0-15] = ol4type:ol3type */
1951                                 0x00, /* none */
1952                                 0x03, /* OUTER_IP_CKSUM */
1953                                 0x02, /* OUTER_IPV4 */
1954                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1955                                 0x04, /* OUTER_IPV6 */
1956                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1957                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1958                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1959                                        * OUTER_IP_CKSUM
1960                                        */
1961                                 0x00, /* OUTER_UDP_CKSUM */
1962                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1963                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1964                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1965                                        * OUTER_IP_CKSUM
1966                                        */
1967                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1968                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1969                                        * OUTER_IP_CKSUM
1970                                        */
1971                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1972                                        * OUTER_IPV4
1973                                        */
1974                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1975                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1976                                        */
1977                         };
1978
1979                         /* Extract olflags to translate to iltypes */
1980                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1981                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1982
1983                         /*
1984                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1985                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1986                          */
1987                         const uint8x16_t shuf_mask5 = {
1988                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1989                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1990                         };
1991                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1992                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1993
1994                         /* Extract outer ol flags only */
1995                         const uint64x2_t o_cksum_mask = {
1996                                 0x1C00020000000000,
1997                                 0x1C00020000000000,
1998                         };
1999
2000                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
2001                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
2002
2003                         /* Extract OUTER_UDP_CKSUM bit 41 and
2004                          * move it to bit 61
2005                          */
2006
2007                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2008                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2009
2010                         /* Shift oltype by 2 to start nibble from BIT(56)
2011                          * instead of BIT(58)
2012                          */
2013                         xtmp128 = vshrq_n_u8(xtmp128, 2);
2014                         ytmp128 = vshrq_n_u8(ytmp128, 2);
2015                         /*
2016                          * E(48):L3_LEN(8):L2_LEN(z+7)
2017                          * E(48):L3_LEN(8):L2_LEN(z+7)
2018                          */
2019                         const int8x16_t tshft3 = {
2020                                 -1, 0, 8, 8, 8, 8, 8, 8,
2021                                 -1, 0, 8, 8, 8, 8, 8, 8,
2022                         };
2023
2024                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2025                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2026
2027                         /* Do the lookup */
2028                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
2029                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
2030
2031                         /* Pick only relevant fields i.e Bit 56:63 of oltype
2032                          * and place it in ol3/ol4type of senddesc_w1
2033                          */
2034                         const uint8x16_t shuf_mask0 = {
2035                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
2036                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
2037                         };
2038
2039                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2040                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2041
2042                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
2043                          * a [E(32):E(16):OL3(8):OL2(8)]
2044                          * a = a + (a << 8)
2045                          * a [E(32):E(16):(OL3+OL2):OL2]
2046                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
2047                          */
2048                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2049                                                  vshlq_n_u16(senddesc01_w1, 8));
2050                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2051                                                  vshlq_n_u16(senddesc23_w1, 8));
2052
2053                         /* Move ltypes to senddesc*_w1 */
2054                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2055                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2056                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
2057                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
2058                         /* Lookup table to translate ol_flags to
2059                          * ol4type, ol3type, il4type, il3type of senddesc_w1
2060                          */
2061                         const uint8x16x2_t tbl = {{
2062                                 {
2063                                         /* [0-15] = il4type:il3type */
2064                                         0x04, /* none (IPv6) */
2065                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
2066                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
2067                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
2068                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
2069                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
2070                                                * RTE_MBUF_F_TX_TCP_CKSUM
2071                                                */
2072                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2073                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2074                                                */
2075                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2076                                                * RTE_MBUF_F_TX_UDP_CKSUM
2077                                                */
2078                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
2079                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
2080                                                * RTE_MBUF_F_TX_TCP_CKSUM
2081                                                */
2082                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
2083                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2084                                                */
2085                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
2086                                                * RTE_MBUF_F_TX_UDP_CKSUM
2087                                                */
2088                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
2089                                                * RTE_MBUF_F_TX_IP_CKSUM
2090                                                */
2091                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2092                                                * RTE_MBUF_F_TX_TCP_CKSUM
2093                                                */
2094                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2095                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2096                                                */
2097                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2098                                                * RTE_MBUF_F_TX_UDP_CKSUM
2099                                                */
2100                                 },
2101
2102                                 {
2103                                         /* [16-31] = ol4type:ol3type */
2104                                         0x00, /* none */
2105                                         0x03, /* OUTER_IP_CKSUM */
2106                                         0x02, /* OUTER_IPV4 */
2107                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2108                                         0x04, /* OUTER_IPV6 */
2109                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2110                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2111                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2112                                                * OUTER_IP_CKSUM
2113                                                */
2114                                         0x00, /* OUTER_UDP_CKSUM */
2115                                         0x33, /* OUTER_UDP_CKSUM |
2116                                                * OUTER_IP_CKSUM
2117                                                */
2118                                         0x32, /* OUTER_UDP_CKSUM |
2119                                                * OUTER_IPV4
2120                                                */
2121                                         0x33, /* OUTER_UDP_CKSUM |
2122                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2123                                                */
2124                                         0x34, /* OUTER_UDP_CKSUM |
2125                                                * OUTER_IPV6
2126                                                */
2127                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2128                                                * OUTER_IP_CKSUM
2129                                                */
2130                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2131                                                * OUTER_IPV4
2132                                                */
2133                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2134                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2135                                                */
2136                                 },
2137                         }};
2138
2139                         /* Extract olflags to translate to oltype & iltype */
2140                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2141                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2142
2143                         /*
2144                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2145                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2146                          */
2147                         const uint32x4_t tshft_4 = {
2148                                 1,
2149                                 0,
2150                                 1,
2151                                 0,
2152                         };
2153                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2154                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2155
2156                         /*
2157                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2158                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2159                          */
2160                         const uint8x16_t shuf_mask5 = {
2161                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2162                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2163                         };
2164                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2165                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2166
2167                         /* Extract outer and inner header ol_flags */
2168                         const uint64x2_t oi_cksum_mask = {
2169                                 0x1CF0020000000000,
2170                                 0x1CF0020000000000,
2171                         };
2172
2173                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2174                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2175
2176                         /* Extract OUTER_UDP_CKSUM bit 41 and
2177                          * move it to bit 61
2178                          */
2179
2180                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2181                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2182
2183                         /* Shift right oltype by 2 and iltype by 4
2184                          * to start oltype nibble from BIT(58)
2185                          * instead of BIT(56) and iltype nibble from BIT(48)
2186                          * instead of BIT(52).
2187                          */
2188                         const int8x16_t tshft5 = {
2189                                 8, 8, 8, 8, 8, 8, -4, -2,
2190                                 8, 8, 8, 8, 8, 8, -4, -2,
2191                         };
2192
2193                         xtmp128 = vshlq_u8(xtmp128, tshft5);
2194                         ytmp128 = vshlq_u8(ytmp128, tshft5);
2195                         /*
2196                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2197                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2198                          */
2199                         const int8x16_t tshft3 = {
2200                                 -1, 0, -1, 0, 0, 0, 0, 0,
2201                                 -1, 0, -1, 0, 0, 0, 0, 0,
2202                         };
2203
2204                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2205                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2206
2207                         /* Mark Bit(4) of oltype */
2208                         const uint64x2_t oi_cksum_mask2 = {
2209                                 0x1000000000000000,
2210                                 0x1000000000000000,
2211                         };
2212
2213                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2214                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2215
2216                         /* Do the lookup */
2217                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2218                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2219
2220                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
2221                          * Bit 56:63 of oltype and place it in corresponding
2222                          * place in senddesc_w1.
2223                          */
2224                         const uint8x16_t shuf_mask0 = {
2225                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2226                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2227                         };
2228
2229                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2230                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2231
2232                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2233                          * l3len, l2len, ol3len, ol2len.
2234                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2235                          * a = a + (a << 8)
2236                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2237                          * a = a + (a << 16)
2238                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2239                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2240                          */
2241                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2242                                                  vshlq_n_u32(senddesc01_w1, 8));
2243                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2244                                                  vshlq_n_u32(senddesc23_w1, 8));
2245
2246                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2247                         senddesc01_w1 = vaddq_u8(
2248                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2249                         senddesc23_w1 = vaddq_u8(
2250                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2251
2252                         /* Move ltypes to senddesc*_w1 */
2253                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2254                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2255                 }
2256
2257                 xmask01 = vdupq_n_u64(0);
2258                 xmask23 = xmask01;
2259                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2260                              : [a] "+w"(xmask01)
2261                              : [in] "r"(mbuf0)
2262                              : "memory");
2263
2264                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2265                              : [a] "+w"(xmask01)
2266                              : [in] "r"(mbuf1)
2267                              : "memory");
2268
2269                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2270                              : [b] "+w"(xmask23)
2271                              : [in] "r"(mbuf2)
2272                              : "memory");
2273
2274                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2275                              : [b] "+w"(xmask23)
2276                              : [in] "r"(mbuf3)
2277                              : "memory");
2278                 xmask01 = vshlq_n_u64(xmask01, 20);
2279                 xmask23 = vshlq_n_u64(xmask23, 20);
2280
2281                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2282                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2283
2284                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2285                         /* Tx ol_flag for vlan. */
2286                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2287                         /* Bit enable for VLAN1 */
2288                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2289                         /* Tx ol_flag for QnQ. */
2290                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2291                         /* Bit enable for VLAN0 */
2292                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2293                         /* Load vlan values from packet. outer is VLAN 0 */
2294                         uint64x2_t ext01 = {
2295                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2296                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2297                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2298                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2299                         };
2300                         uint64x2_t ext23 = {
2301                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2302                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2303                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2304                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2305                         };
2306
2307                         /* Get ol_flags of the packets. */
2308                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2309                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2310
2311                         /* ORR vlan outer/inner values into cmd. */
2312                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2313                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2314
2315                         /* Test for offload enable bits and generate masks. */
2316                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2317                                                       mlv),
2318                                             vandq_u64(vtstq_u64(xtmp128, olq),
2319                                                       mlq));
2320                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2321                                                       mlv),
2322                                             vandq_u64(vtstq_u64(ytmp128, olq),
2323                                                       mlq));
2324
2325                         /* Set vlan enable bits into cmd based on mask. */
2326                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2327                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2328                 }
2329
2330                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2331                         /* Tx ol_flag for timestamp. */
2332                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2333                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
2334                         /* Set send mem alg to SUB. */
2335                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2336                         /* Increment send mem address by 8. */
2337                         const uint64x2_t addr = {0x8, 0x8};
2338
2339                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2340                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2341
2342                         /* Check if timestamp is requested and generate inverted
2343                          * mask as we need not make any changes to default cmd
2344                          * value.
2345                          */
2346                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2347                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2348
2349                         /* Change send mem address to an 8 byte offset when
2350                          * TSTMP is disabled.
2351                          */
2352                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
2353                                                  vandq_u64(xtmp128, addr));
2354                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
2355                                                  vandq_u64(ytmp128, addr));
2356                         /* Change send mem alg to SUB when TSTMP is disabled. */
2357                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
2358                                                  vandq_u64(xtmp128, alg));
2359                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
2360                                                  vandq_u64(ytmp128, alg));
2361
2362                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2363                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2364                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2365                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2366                 }
2367
2368                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2369                         const uint64_t lso_fmt = txq->lso_tun_fmt;
2370                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2371                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2372
2373                         /* Extract SD W1 as we need to set L4 types. */
2374                         vst1q_u64(sd_w1, senddesc01_w1);
2375                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
2376
2377                         /* Extract SX W0 as we need to set LSO fields. */
2378                         vst1q_u64(sx_w0, sendext01_w0);
2379                         vst1q_u64(sx_w0 + 2, sendext23_w0);
2380
2381                         /* Extract ol_flags. */
2382                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2383                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2384
2385                         /* Prepare individual mbufs. */
2386                         cn10k_nix_prepare_tso(tx_pkts[0],
2387                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
2388                                 (union nix_send_ext_w0_u *)&sx_w0[0],
2389                                 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2390
2391                         cn10k_nix_prepare_tso(tx_pkts[1],
2392                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
2393                                 (union nix_send_ext_w0_u *)&sx_w0[1],
2394                                 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2395
2396                         cn10k_nix_prepare_tso(tx_pkts[2],
2397                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
2398                                 (union nix_send_ext_w0_u *)&sx_w0[2],
2399                                 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2400
2401                         cn10k_nix_prepare_tso(tx_pkts[3],
2402                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
2403                                 (union nix_send_ext_w0_u *)&sx_w0[3],
2404                                 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2405
2406                         senddesc01_w1 = vld1q_u64(sd_w1);
2407                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2408
2409                         sendext01_w0 = vld1q_u64(sx_w0);
2410                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
2411                 }
2412
2413                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2414                     !(flags & NIX_TX_MULTI_SEG_F) &&
2415                     !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2416                         /* Set don't free bit if reference count > 1 */
2417                         xmask01 = vdupq_n_u64(0);
2418                         xmask23 = xmask01;
2419
2420                         /* Move mbufs to iova */
2421                         mbuf0 = (uint64_t *)tx_pkts[0];
2422                         mbuf1 = (uint64_t *)tx_pkts[1];
2423                         mbuf2 = (uint64_t *)tx_pkts[2];
2424                         mbuf3 = (uint64_t *)tx_pkts[3];
2425
2426                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2427                                 vsetq_lane_u64(0x80000, xmask01, 0);
2428                         else
2429                                 RTE_MEMPOOL_CHECK_COOKIES(
2430                                         ((struct rte_mbuf *)mbuf0)->pool,
2431                                         (void **)&mbuf0, 1, 0);
2432
2433                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2434                                 vsetq_lane_u64(0x80000, xmask01, 1);
2435                         else
2436                                 RTE_MEMPOOL_CHECK_COOKIES(
2437                                         ((struct rte_mbuf *)mbuf1)->pool,
2438                                         (void **)&mbuf1, 1, 0);
2439
2440                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2441                                 vsetq_lane_u64(0x80000, xmask23, 0);
2442                         else
2443                                 RTE_MEMPOOL_CHECK_COOKIES(
2444                                         ((struct rte_mbuf *)mbuf2)->pool,
2445                                         (void **)&mbuf2, 1, 0);
2446
2447                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2448                                 vsetq_lane_u64(0x80000, xmask23, 1);
2449                         else
2450                                 RTE_MEMPOOL_CHECK_COOKIES(
2451                                         ((struct rte_mbuf *)mbuf3)->pool,
2452                                         (void **)&mbuf3, 1, 0);
2453                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2454                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2455                 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2456                            !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2457                         /* Move mbufs to iova */
2458                         mbuf0 = (uint64_t *)tx_pkts[0];
2459                         mbuf1 = (uint64_t *)tx_pkts[1];
2460                         mbuf2 = (uint64_t *)tx_pkts[2];
2461                         mbuf3 = (uint64_t *)tx_pkts[3];
2462
2463                         /* Mark mempool object as "put" since
2464                          * it is freed by NIX
2465                          */
2466                         RTE_MEMPOOL_CHECK_COOKIES(
2467                                 ((struct rte_mbuf *)mbuf0)->pool,
2468                                 (void **)&mbuf0, 1, 0);
2469
2470                         RTE_MEMPOOL_CHECK_COOKIES(
2471                                 ((struct rte_mbuf *)mbuf1)->pool,
2472                                 (void **)&mbuf1, 1, 0);
2473
2474                         RTE_MEMPOOL_CHECK_COOKIES(
2475                                 ((struct rte_mbuf *)mbuf2)->pool,
2476                                 (void **)&mbuf2, 1, 0);
2477
2478                         RTE_MEMPOOL_CHECK_COOKIES(
2479                                 ((struct rte_mbuf *)mbuf3)->pool,
2480                                 (void **)&mbuf3, 1, 0);
2481                 }
2482
2483                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2484                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2485                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2486                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2487                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2488
2489                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2490                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2491                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2492                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2493
2494                 if (flags & NIX_TX_NEED_EXT_HDR) {
2495                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2496                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2497                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2498                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2499                 }
2500
2501                 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2502                         const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2503                                                 RTE_MBUF_F_TX_SEC_OFFLOAD};
2504                         uintptr_t next;
2505                         uint8_t dw;
2506
2507                         /* Extract ol_flags. */
2508                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2509                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2510
2511                         xtmp128 = vtstq_u64(olf, xtmp128);
2512                         ytmp128 = vtstq_u64(olf, ytmp128);
2513
2514                         /* Process mbuf0 */
2515                         dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2516                         if (vgetq_lane_u64(xtmp128, 0))
2517                                 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2518                                                        &cmd1[0], &next, c_laddr,
2519                                                        &c_lnum, &c_loff,
2520                                                        &c_shft, sa_base, flags);
2521                         else
2522                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2523                                                    &shift, &wd.data128, &next);
2524
2525                         /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2526                         cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2527                                              cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2528                                              flags);
2529
2530                         /* Process mbuf1 */
2531                         dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2532                         if (vgetq_lane_u64(xtmp128, 1))
2533                                 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2534                                                        &cmd1[1], &next, c_laddr,
2535                                                        &c_lnum, &c_loff,
2536                                                        &c_shft, sa_base, flags);
2537                         else
2538                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2539                                                    &shift, &wd.data128, &next);
2540
2541                         /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2542                         cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2543                                              cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2544                                              flags);
2545
2546                         /* Process mbuf2 */
2547                         dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2548                         if (vgetq_lane_u64(ytmp128, 0))
2549                                 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2550                                                        &cmd1[2], &next, c_laddr,
2551                                                        &c_lnum, &c_loff,
2552                                                        &c_shft, sa_base, flags);
2553                         else
2554                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2555                                                    &shift, &wd.data128, &next);
2556
2557                         /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2558                         cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2559                                              cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2560                                              flags);
2561
2562                         /* Process mbuf3 */
2563                         dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2564                         if (vgetq_lane_u64(ytmp128, 1))
2565                                 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2566                                                        &cmd1[3], &next, c_laddr,
2567                                                        &c_lnum, &c_loff,
2568                                                        &c_shft, sa_base, flags);
2569                         else
2570                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2571                                                    &shift, &wd.data128, &next);
2572
2573                         /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2574                         cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2575                                              cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2576                                              flags);
2577
2578                 } else if (flags & NIX_TX_MULTI_SEG_F) {
2579                         uint8_t j;
2580
2581                         segdw[4] = 8;
2582                         j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2583                                                           cmd2, cmd3, segdw,
2584                                                           (uint64_t *)
2585                                                           LMT_OFF(laddr, lnum,
2586                                                                   0),
2587                                                           &wd.data128, &shift,
2588                                                           flags);
2589                         lnum += j;
2590                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2591                         /* Store the prepared send desc to LMT lines */
2592                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2593                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2594                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2595                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2596                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2597                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2598                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2599                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2600                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2601                                 lnum += 1;
2602                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2603                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2604                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2605                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2606                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2607                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2608                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2609                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2610                         } else {
2611                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2612                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2613                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2614                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2615                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2616                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2617                                 lnum += 1;
2618                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2619                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2620                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2621                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2622                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2623                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2624                         }
2625                         lnum += 1;
2626                 } else {
2627                         /* Store the prepared send desc to LMT lines */
2628                         vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2629                         vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2630                         vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2631                         vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2632                         vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2633                         vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2634                         vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2635                         vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2636                         lnum += 1;
2637                 }
2638
2639                 if (flags & NIX_TX_MULTI_SEG_F) {
2640                         tx_pkts[0]->next = NULL;
2641                         tx_pkts[1]->next = NULL;
2642                         tx_pkts[2]->next = NULL;
2643                         tx_pkts[3]->next = NULL;
2644                 }
2645
2646                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2647         }
2648
2649         /* Roundup lnum to last line if it is partial */
2650         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2651                 lnum = lnum + !!loff;
2652                 wd.data128 = wd.data128 |
2653                         (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2654         }
2655
2656         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2657                 wd.data[0] >>= 16;
2658
2659         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
2660                 ws[1] = roc_sso_hws_head_wait(ws[0]);
2661
2662         left -= burst;
2663
2664         /* Submit CPT instructions if any */
2665         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2666                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2667                                      c_shft);
2668
2669         /* Trigger LMTST */
2670         if (lnum > 16) {
2671                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2672                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2673
2674                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2675                 wd.data[0] &= ~0x7ULL;
2676
2677                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2678                         wd.data[0] <<= 16;
2679
2680                 wd.data[0] |= (15ULL << 12);
2681                 wd.data[0] |= (uint64_t)lmt_id;
2682
2683                 /* STEOR0 */
2684                 roc_lmt_submit_steorl(wd.data[0], pa);
2685
2686                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2687                         wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2688
2689                 pa = io_addr | (wd.data[1] & 0x7) << 4;
2690                 wd.data[1] &= ~0x7ULL;
2691
2692                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2693                         wd.data[1] <<= 16;
2694
2695                 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2696                 wd.data[1] |= (uint64_t)(lmt_id + 16);
2697
2698                 /* STEOR1 */
2699                 roc_lmt_submit_steorl(wd.data[1], pa);
2700         } else if (lnum) {
2701                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2702                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2703
2704                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2705                 wd.data[0] &= ~0x7ULL;
2706
2707                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2708                         wd.data[0] <<= 16;
2709
2710                 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2711                 wd.data[0] |= lmt_id;
2712
2713                 /* STEOR0 */
2714                 roc_lmt_submit_steorl(wd.data[0], pa);
2715         }
2716
2717         rte_io_wmb();
2718         if (left)
2719                 goto again;
2720
2721         if (unlikely(scalar)) {
2722                 if (flags & NIX_TX_MULTI_SEG_F)
2723                         pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2724                                                          scalar, cmd, flags);
2725                 else
2726                         pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2727                                                     scalar, cmd, flags);
2728         }
2729
2730         return pkts;
2731 }
2732
2733 #else
2734 static __rte_always_inline uint16_t
2735 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2736                            struct rte_mbuf **tx_pkts, uint16_t pkts,
2737                            uint64_t *cmd, const uint16_t flags)
2738 {
2739         RTE_SET_USED(ws);
2740         RTE_SET_USED(tx_queue);
2741         RTE_SET_USED(tx_pkts);
2742         RTE_SET_USED(pkts);
2743         RTE_SET_USED(cmd);
2744         RTE_SET_USED(flags);
2745         return 0;
2746 }
2747 #endif
2748
2749 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2750 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2751 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2752 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2753 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
2754 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2755 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2756
2757 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2758 #define NIX_TX_FASTPATH_MODES_0_15                                             \
2759         T(no_offload, 6, NIX_TX_OFFLOAD_NONE)                                  \
2760         T(l3l4csum, 6, L3L4CSUM_F)                                             \
2761         T(ol3ol4csum, 6, OL3OL4CSUM_F)                                         \
2762         T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F)                   \
2763         T(vlan, 6, VLAN_F)                                                     \
2764         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
2765         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
2766         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2767         T(noff, 6, NOFF_F)                                                     \
2768         T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F)                               \
2769         T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F)                           \
2770         T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2771         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
2772         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2773         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
2774         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
2775           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2776
2777 #define NIX_TX_FASTPATH_MODES_16_31                                            \
2778         T(tso, 6, TSO_F)                                                       \
2779         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
2780         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
2781         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
2782         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
2783         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
2784         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
2785         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2786           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2787         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
2788         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
2789         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
2790         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
2791           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2792         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
2793         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
2794         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2795         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2796           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2797
2798 #define NIX_TX_FASTPATH_MODES_32_47                                            \
2799         T(ts, 8, TSP_F)                                                        \
2800         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
2801         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
2802         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2803         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
2804         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
2805         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2806         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
2807           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2808         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
2809         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
2810         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2811         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
2812           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2813         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
2814         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
2815         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
2816         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
2817           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2818
2819 #define NIX_TX_FASTPATH_MODES_48_63                                            \
2820         T(ts_tso, 8, TSP_F | TSO_F)                                            \
2821         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
2822         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
2823         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
2824           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
2825         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
2826         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
2827         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
2828         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2829           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2830         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
2831         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
2832         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
2833         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
2834           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2835         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
2836         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
2837           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
2838         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
2839           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
2840         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2841           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2842
2843 #define NIX_TX_FASTPATH_MODES_64_79                                            \
2844         T(sec, 6, T_SEC_F)                                                     \
2845         T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F)                               \
2846         T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F)                           \
2847         T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2848         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
2849         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
2850         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
2851         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2852           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2853         T(sec_noff, 6, T_SEC_F | NOFF_F)                                       \
2854         T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2855         T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2856         T(sec_noff_ol3ol4csum_l3l4csum, 6,                                     \
2857           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2858         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2859         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2860         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2861           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2862         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2863           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2864
2865 #define NIX_TX_FASTPATH_MODES_80_95                                            \
2866         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2867         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2868         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2869         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2870           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2871         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2872         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2873         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2874         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2875           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2876         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2877         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2878         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2879         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2880           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2881         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2882         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2883           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2884         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2885           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2886         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2887           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2888
2889 #define NIX_TX_FASTPATH_MODES_96_111                                           \
2890         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2891         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2892         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2893         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2894           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2895         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2896         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2897         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2898         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2899           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2900         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2901         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2902         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2903         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2904           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2905         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2906         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2907           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2908         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2909           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2910         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2911           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2912
2913 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2914         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2915         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2916         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2917         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2918           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2919         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2920         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2921           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2922         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2923           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2924         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2925           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2926         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2927         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2928           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2929         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2930           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2931         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2932           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2933         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2934         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2935           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2936         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2937           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2938         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2939           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2940                   L3L4CSUM_F)
2941
2942 #define NIX_TX_FASTPATH_MODES                                                  \
2943         NIX_TX_FASTPATH_MODES_0_15                                             \
2944         NIX_TX_FASTPATH_MODES_16_31                                            \
2945         NIX_TX_FASTPATH_MODES_32_47                                            \
2946         NIX_TX_FASTPATH_MODES_48_63                                            \
2947         NIX_TX_FASTPATH_MODES_64_79                                            \
2948         NIX_TX_FASTPATH_MODES_80_95                                            \
2949         NIX_TX_FASTPATH_MODES_96_111                                           \
2950         NIX_TX_FASTPATH_MODES_112_127
2951
2952 #define T(name, sz, flags)                                                     \
2953         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
2954                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2955         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
2956                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2957         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
2958                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2959         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2960                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2961
2962 NIX_TX_FASTPATH_MODES
2963 #undef T
2964
2965 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2966         uint16_t __rte_noinline __rte_hot fn(                                  \
2967                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2968         {                                                                      \
2969                 uint64_t cmd[sz];                                              \
2970                 /* For TSO inner checksum is a must */                         \
2971                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2972                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2973                         return 0;                                              \
2974                 return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
2975                                            flags);                             \
2976         }
2977
2978 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2979         uint16_t __rte_noinline __rte_hot fn(                                  \
2980                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2981         {                                                                      \
2982                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2983                 /* For TSO inner checksum is a must */                         \
2984                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2985                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2986                         return 0;                                              \
2987                 return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
2988                                                 cmd,                           \
2989                                                 flags | NIX_TX_MULTI_SEG_F);   \
2990         }
2991
2992 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
2993         uint16_t __rte_noinline __rte_hot fn(                                  \
2994                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2995         {                                                                      \
2996                 uint64_t cmd[sz];                                              \
2997                 /* For TSO inner checksum is a must */                         \
2998                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2999                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3000                         return 0;                                              \
3001                 return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts,     \
3002                                                   pkts, cmd, (flags));         \
3003         }
3004
3005 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
3006         uint16_t __rte_noinline __rte_hot fn(                                  \
3007                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3008         {                                                                      \
3009                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
3010                 /* For TSO inner checksum is a must */                         \
3011                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3012                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3013                         return 0;                                              \
3014                 return cn10k_nix_xmit_pkts_vector(                             \
3015                         tx_queue, NULL, tx_pkts, pkts, cmd,                    \
3016                         (flags) | NIX_TX_MULTI_SEG_F);                         \
3017         }
3018
3019 #endif /* __CN10K_TX_H__ */