net/cnxk: update inline device in ethdev telemetry
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #include <rte_eventdev.h>
10
11 #define NIX_TX_OFFLOAD_NONE           (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
19 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
20
21 /* Flags to control xmit_prepare function.
22  * Defining it from backwards to denote its been
23  * not used as offload flags to pick function
24  */
25 #define NIX_TX_VWQE_F      BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
27
28 #define NIX_TX_NEED_SEND_HDR_W1                                                \
29         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
30          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31
32 #define NIX_TX_NEED_EXT_HDR                                                    \
33         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
34          NIX_TX_OFFLOAD_TSO_F)
35
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
37         do {                                                                   \
38                 /* Cached value is low, Update the fc_cache_pkts */            \
39                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
40                         /* Multiply with sqe_per_sqb to express in pkts */     \
41                         (txq)->fc_cache_pkts =                                 \
42                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
43                                 << (txq)->sqes_per_sqb_log2;                   \
44                         /* Check it again for the room */                      \
45                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
46                                 return 0;                                      \
47                 }                                                              \
48         } while (0)
49
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51  * is encoded as 4bits.
52  */
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
54
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56
57 /* Function to determine no of tx subdesc required in case ext
58  * sub desc is enabled.
59  */
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 {
63         return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
64                              2 :
65                              ((flags &
66                          (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
67                                       1 :
68                                       0);
69 }
70
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73 {
74         if (!(flags & NIX_TX_MULTI_SEG_F))
75                 return cn10k_nix_tx_ext_subs(flags) + 2;
76
77         /* Already everything is accounted for in segdw */
78         return segdw;
79 }
80
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83 {
84         return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85                << ROC_LMT_LINES_PER_CORE_LOG2;
86 }
87
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90 {
91         return (flags & NIX_TX_NEED_EXT_HDR) ?
92                              ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
93                              8;
94 }
95
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
98 {
99         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
100         uint64_t data;
101
102         /* This will be moved to addr area */
103         data = dw_m1;
104         /* 15 vector sizes for single seg */
105         data |= dw_m1 << 19;
106         data |= dw_m1 << 22;
107         data |= dw_m1 << 25;
108         data |= dw_m1 << 28;
109         data |= dw_m1 << 31;
110         data |= dw_m1 << 34;
111         data |= dw_m1 << 37;
112         data |= dw_m1 << 40;
113         data |= dw_m1 << 43;
114         data |= dw_m1 << 46;
115         data |= dw_m1 << 49;
116         data |= dw_m1 << 52;
117         data |= dw_m1 << 55;
118         data |= dw_m1 << 58;
119         data |= dw_m1 << 61;
120
121         return data;
122 }
123
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126 {
127         return ((flags & NIX_TX_NEED_EXT_HDR) ?
128                               (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
129                               4);
130 }
131
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134 {
135         const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
136         uint64_t data;
137
138         /* This will be moved to addr area */
139         data = dw_m1;
140         /* 15 vector sizes for single seg */
141         data |= dw_m1 << 19;
142         data |= dw_m1 << 22;
143         data |= dw_m1 << 25;
144         data |= dw_m1 << 28;
145         data |= dw_m1 << 31;
146         data |= dw_m1 << 34;
147         data |= dw_m1 << 37;
148         data |= dw_m1 << 40;
149         data |= dw_m1 << 43;
150         data |= dw_m1 << 46;
151         data |= dw_m1 << 49;
152         data |= dw_m1 << 52;
153         data |= dw_m1 << 55;
154         data |= dw_m1 << 58;
155         data |= dw_m1 << 61;
156
157         return data;
158 }
159
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
162 {
163         /* We have two CPT instructions per LMTLine */
164         const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
165         uint64_t data;
166
167         /* This will be moved to addr area */
168         data = dw_m1 << 16;
169         data |= dw_m1 << 19;
170         data |= dw_m1 << 22;
171         data |= dw_m1 << 25;
172         data |= dw_m1 << 28;
173         data |= dw_m1 << 31;
174         data |= dw_m1 << 34;
175         data |= dw_m1 << 37;
176         data |= dw_m1 << 40;
177         data |= dw_m1 << 43;
178         data |= dw_m1 << 46;
179         data |= dw_m1 << 49;
180         data |= dw_m1 << 52;
181         data |= dw_m1 << 55;
182         data |= dw_m1 << 58;
183         data |= dw_m1 << 61;
184
185         return data;
186 }
187
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
190                       const uint16_t flags, const uint16_t static_sz)
191 {
192         if (static_sz)
193                 cmd[0] = txq->send_hdr_w0;
194         else
195                 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
196                          ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
197         cmd[1] = 0;
198
199         if (flags & NIX_TX_NEED_EXT_HDR) {
200                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
201                         cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
202                 else
203                         cmd[2] = NIX_SUBDC_EXT << 60;
204                 cmd[3] = 0;
205                 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
206         } else {
207                 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
208         }
209 }
210
211 static __rte_always_inline void
212 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
213                      uint8_t loff, uint8_t shft)
214 {
215         uint64_t data;
216         uintptr_t pa;
217
218         /* Check if there is any CPT instruction to submit */
219         if (!lnum && !loff)
220                 return;
221
222         data = cn10k_cpt_tx_steor_data();
223         /* Update lmtline use for partial end line */
224         if (loff) {
225                 data &= ~(0x7ULL << shft);
226                 /* Update it to half full i.e 64B */
227                 data |= (0x3UL << shft);
228         }
229
230         pa = io_addr | ((data >> 16) & 0x7) << 4;
231         data &= ~(0x7ULL << 16);
232         /* Update lines - 1 that contain valid data */
233         data |= ((uint64_t)(lnum + loff - 1)) << 12;
234         data |= lmt_id;
235
236         /* STEOR */
237         roc_lmt_submit_steorl(data, pa);
238 }
239
240 #if defined(RTE_ARCH_ARM64)
241 static __rte_always_inline void
242 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
243                        uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
244                        uint8_t *loff, uint8_t *shft, uint64_t sa_base,
245                        const uint16_t flags)
246 {
247         struct cn10k_sec_sess_priv sess_priv;
248         uint32_t pkt_len, dlen_adj, rlen;
249         uint8_t l3l4type, chksum;
250         uint64x2_t cmd01, cmd23;
251         uint8_t l2_len, l3_len;
252         uintptr_t dptr, nixtx;
253         uint64_t ucode_cmd[4];
254         uint64_t *laddr;
255         uint16_t tag;
256         uint64_t sa;
257
258         sess_priv.u64 = *rte_security_dynfield(m);
259
260         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
261                 /* Extract l3l4type either from il3il4type or ol3ol4type */
262                 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
263                     flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
264                         l2_len = vgetq_lane_u8(*cmd0, 10);
265                         /* L4 ptr from send hdr includes l2 and l3 len */
266                         l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len;
267                         l3l4type = vgetq_lane_u8(*cmd0, 13);
268                 } else {
269                         l2_len = vgetq_lane_u8(*cmd0, 8);
270                         /* L4 ptr from send hdr includes l2 and l3 len */
271                         l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len;
272                         l3l4type = vgetq_lane_u8(*cmd0, 12);
273                 }
274
275                 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
276                 chksum = ~chksum;
277                 sess_priv.chksum = sess_priv.chksum & chksum;
278                 /* Clear SEND header flags */
279                 *cmd0 = vsetq_lane_u16(0, *cmd0, 6);
280         } else {
281                 l2_len = m->l2_len;
282                 l3_len = m->l3_len;
283         }
284
285         /* Retrieve DPTR */
286         dptr = vgetq_lane_u64(*cmd1, 1);
287         pkt_len = vgetq_lane_u16(*cmd0, 0);
288
289         /* Calculate dlen adj */
290         dlen_adj = pkt_len - l2_len;
291         /* Exclude l3 len from roundup for transport mode */
292         dlen_adj -= sess_priv.mode ? 0 : l3_len;
293         rlen = (dlen_adj + sess_priv.roundup_len) +
294                (sess_priv.roundup_byte - 1);
295         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
296         rlen += sess_priv.partial_len;
297         dlen_adj = rlen - dlen_adj;
298
299         /* Update send descriptors. Security is single segment only */
300         *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
301         *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
302
303         /* Get area where NIX descriptor needs to be stored */
304         nixtx = dptr + pkt_len + dlen_adj;
305         nixtx += BIT_ULL(7);
306         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
307
308         /* Return nixtx addr */
309         *nixtx_addr = (nixtx + 16);
310
311         /* DLEN passed is excluding L2HDR */
312         pkt_len -= l2_len;
313         tag = sa_base & 0xFFFFUL;
314         sa_base &= ~0xFFFFUL;
315         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
316         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
317         ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
318                         ((uint64_t)sess_priv.chksum) << 32 | pkt_len);
319
320         /* CPT Word 0 and Word 1 */
321         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
322         /* CPT_RES_S is 16B above NIXTX */
323         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
324
325         /* CPT word 2 and 3 */
326         cmd23 = vdupq_n_u64(0);
327         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
328                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
329         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
330
331         dptr += l2_len;
332
333         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
334                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
335                         *((uint16_t *)(dptr - 2)) =
336                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
337                 else
338                         *((uint16_t *)(dptr - 2)) =
339                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
340         }
341
342         ucode_cmd[1] = dptr;
343         ucode_cmd[2] = dptr;
344
345         /* Move to our line */
346         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
347
348         /* Write CPT instruction to lmt line */
349         vst1q_u64(laddr, cmd01);
350         vst1q_u64((laddr + 2), cmd23);
351
352         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
353         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
354
355         /* Move to next line for every other CPT inst */
356         *loff = !(*loff);
357         *lnum = *lnum + (*loff ? 0 : 1);
358         *shft = *shft + (*loff ? 0 : 3);
359 }
360
361 static __rte_always_inline void
362 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
363                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
364                    uint64_t sa_base, const uint16_t flags)
365 {
366         struct cn10k_sec_sess_priv sess_priv;
367         uint32_t pkt_len, dlen_adj, rlen;
368         struct nix_send_hdr_s *send_hdr;
369         uint8_t l3l4type, chksum;
370         uint64x2_t cmd01, cmd23;
371         union nix_send_sg_s *sg;
372         uint8_t l2_len, l3_len;
373         uintptr_t dptr, nixtx;
374         uint64_t ucode_cmd[4];
375         uint64_t *laddr;
376         uint16_t tag;
377         uint64_t sa;
378
379         /* Move to our line from base */
380         sess_priv.u64 = *rte_security_dynfield(m);
381         send_hdr = (struct nix_send_hdr_s *)cmd;
382         if (flags & NIX_TX_NEED_EXT_HDR)
383                 sg = (union nix_send_sg_s *)&cmd[4];
384         else
385                 sg = (union nix_send_sg_s *)&cmd[2];
386
387         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
388                 /* Extract l3l4type either from il3il4type or ol3ol4type */
389                 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
390                     flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
391                         l2_len = (cmd[1] >> 16) & 0xFF;
392                         /* L4 ptr from send hdr includes l2 and l3 len */
393                         l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len;
394                         l3l4type = (cmd[1] >> 40) & 0xFF;
395                 } else {
396                         l2_len = cmd[1] & 0xFF;
397                         /* L4 ptr from send hdr includes l2 and l3 len */
398                         l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len;
399                         l3l4type = (cmd[1] >> 32) & 0xFF;
400                 }
401
402                 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
403                 chksum = ~chksum;
404                 sess_priv.chksum = sess_priv.chksum & chksum;
405                 /* Clear SEND header flags */
406                 cmd[1] &= ~(0xFFFFUL << 32);
407         } else {
408                 l2_len = m->l2_len;
409                 l3_len = m->l3_len;
410         }
411
412         /* Retrieve DPTR */
413         dptr = *(uint64_t *)(sg + 1);
414         pkt_len = send_hdr->w0.total;
415
416         /* Calculate dlen adj */
417         dlen_adj = pkt_len - l2_len;
418         /* Exclude l3 len from roundup for transport mode */
419         dlen_adj -= sess_priv.mode ? 0 : l3_len;
420         rlen = (dlen_adj + sess_priv.roundup_len) +
421                (sess_priv.roundup_byte - 1);
422         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
423         rlen += sess_priv.partial_len;
424         dlen_adj = rlen - dlen_adj;
425
426         /* Update send descriptors. Security is single segment only */
427         send_hdr->w0.total = pkt_len + dlen_adj;
428         sg->seg1_size = pkt_len + dlen_adj;
429
430         /* Get area where NIX descriptor needs to be stored */
431         nixtx = dptr + pkt_len + dlen_adj;
432         nixtx += BIT_ULL(7);
433         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
434
435         /* Return nixtx addr */
436         *nixtx_addr = (nixtx + 16);
437
438         /* DLEN passed is excluding L2HDR */
439         pkt_len -= l2_len;
440         tag = sa_base & 0xFFFFUL;
441         sa_base &= ~0xFFFFUL;
442         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
443         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
444         ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
445                         ((uint64_t)sess_priv.chksum) << 32 | pkt_len);
446
447         /* CPT Word 0 and Word 1. Assume no multi-seg support */
448         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
449         /* CPT_RES_S is 16B above NIXTX */
450         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
451
452         /* CPT word 2 and 3 */
453         cmd23 = vdupq_n_u64(0);
454         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
455                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
456         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
457
458         dptr += l2_len;
459
460         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
461                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
462                         *((uint16_t *)(dptr - 2)) =
463                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
464                 else
465                         *((uint16_t *)(dptr - 2)) =
466                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
467         }
468         ucode_cmd[1] = dptr;
469         ucode_cmd[2] = dptr;
470
471         /* Move to our line */
472         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
473
474         /* Write CPT instruction to lmt line */
475         vst1q_u64(laddr, cmd01);
476         vst1q_u64((laddr + 2), cmd23);
477
478         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
479         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
480
481         /* Move to next line for every other CPT inst */
482         *loff = !(*loff);
483         *lnum = *lnum + (*loff ? 0 : 1);
484         *shft = *shft + (*loff ? 0 : 3);
485 }
486
487 #else
488
489 static __rte_always_inline void
490 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
491                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
492                    uint64_t sa_base, const uint16_t flags)
493 {
494         RTE_SET_USED(m);
495         RTE_SET_USED(cmd);
496         RTE_SET_USED(nixtx_addr);
497         RTE_SET_USED(lbase);
498         RTE_SET_USED(lnum);
499         RTE_SET_USED(loff);
500         RTE_SET_USED(shft);
501         RTE_SET_USED(sa_base);
502         RTE_SET_USED(flags);
503 }
504 #endif
505
506 static __rte_always_inline void
507 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
508 {
509         uint64_t mask, ol_flags = m->ol_flags;
510
511         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
512                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
513                 uint16_t *iplen, *oiplen, *oudplen;
514                 uint16_t lso_sb, paylen;
515
516                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
517                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
518                          m->l2_len + m->l3_len + m->l4_len;
519
520                 /* Reduce payload len from base headers */
521                 paylen = m->pkt_len - lso_sb;
522
523                 /* Get iplen position assuming no tunnel hdr */
524                 iplen = (uint16_t *)(mdata + m->l2_len +
525                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
526                 /* Handle tunnel tso */
527                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
528                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
529                         const uint8_t is_udp_tun =
530                                 (CNXK_NIX_UDP_TUN_BITMASK >>
531                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
532                                 0x1;
533
534                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
535                                               (2 << !!(ol_flags &
536                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
537                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
538                                                    paylen);
539
540                         /* Update format for UDP tunneled packet */
541                         if (is_udp_tun) {
542                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
543                                                        m->outer_l3_len + 4);
544                                 *oudplen = rte_cpu_to_be_16(
545                                         rte_be_to_cpu_16(*oudplen) - paylen);
546                         }
547
548                         /* Update iplen position to inner ip hdr */
549                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
550                                              m->l4_len +
551                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
552                 }
553
554                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
555         }
556 }
557
558 static __rte_always_inline void
559 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
560                        const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
561                        uint64_t mark_fmt)
562 {
563         uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
564         struct nix_send_ext_s *send_hdr_ext;
565         struct nix_send_hdr_s *send_hdr;
566         uint64_t ol_flags = 0, mask;
567         union nix_send_hdr_w1_u w1;
568         union nix_send_sg_s *sg;
569         uint16_t mark_form = 0;
570
571         send_hdr = (struct nix_send_hdr_s *)cmd;
572         if (flags & NIX_TX_NEED_EXT_HDR) {
573                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
574                 sg = (union nix_send_sg_s *)(cmd + 4);
575                 /* Clear previous markings */
576                 send_hdr_ext->w0.lso = 0;
577                 send_hdr_ext->w0.mark_en = 0;
578                 send_hdr_ext->w1.u = 0;
579                 ol_flags = m->ol_flags;
580         } else {
581                 sg = (union nix_send_sg_s *)(cmd + 2);
582         }
583
584         if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
585                 ol_flags = m->ol_flags;
586                 w1.u = 0;
587         }
588
589         if (!(flags & NIX_TX_MULTI_SEG_F))
590                 send_hdr->w0.total = m->data_len;
591         else
592                 send_hdr->w0.total = m->pkt_len;
593         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
594
595         /*
596          * L3type:  2 => IPV4
597          *          3 => IPV4 with csum
598          *          4 => IPV6
599          * L3type and L3ptr needs to be set for either
600          * L3 csum or L4 csum or LSO
601          *
602          */
603
604         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
605             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
606                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
607                 const uint8_t ol3type =
608                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
609                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
610                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
611
612                 /* Outer L3 */
613                 w1.ol3type = ol3type;
614                 mask = 0xffffull << ((!!ol3type) << 4);
615                 w1.ol3ptr = ~mask & m->outer_l2_len;
616                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
617
618                 /* Outer L4 */
619                 w1.ol4type = csum + (csum << 1);
620
621                 /* Inner L3 */
622                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
623                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
624                 w1.il3ptr = w1.ol4ptr + m->l2_len;
625                 w1.il4ptr = w1.il3ptr + m->l3_len;
626                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
627                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
628
629                 /* Inner L4 */
630                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
631
632                 /* In case of no tunnel header use only
633                  * shift IL3/IL4 fields a bit to use
634                  * OL3/OL4 for header checksum
635                  */
636                 mask = !ol3type;
637                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
638                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
639
640         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
641                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
642                 const uint8_t outer_l2_len = m->outer_l2_len;
643
644                 /* Outer L3 */
645                 w1.ol3ptr = outer_l2_len;
646                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
647                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
648                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
649                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
650                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
651
652                 /* Outer L4 */
653                 w1.ol4type = csum + (csum << 1);
654
655         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
656                 const uint8_t l2_len = m->l2_len;
657
658                 /* Always use OLXPTR and OLXTYPE when only
659                  * when one header is present
660                  */
661
662                 /* Inner L3 */
663                 w1.ol3ptr = l2_len;
664                 w1.ol4ptr = l2_len + m->l3_len;
665                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
666                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
667                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
668                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
669
670                 /* Inner L4 */
671                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
672         }
673
674         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
675                 const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
676                 const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
677                                                   RTE_MBUF_F_TX_IPV6));
678
679                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
680                 /* HW will update ptr after vlan0 update */
681                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
682                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
683
684                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
685                 /* 2B before end of l2 header */
686                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
687                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
688                 /* Fill for VLAN marking only when VLAN insertion enabled */
689                 mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
690                              (send_hdr_ext->w1.vlan1_ins_ena ||
691                               send_hdr_ext->w1.vlan0_ins_ena));
692
693                 /* Mask requested flags with packet data information */
694                 mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
695                 mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
696
697                 mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
698                 mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
699                 markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
700
701                 send_hdr_ext->w0.mark_en = !!mark_off;
702                 send_hdr_ext->w0.markform = mark_form & 0x7F;
703                 send_hdr_ext->w0.markptr = markptr;
704         }
705
706         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
707                 uint16_t lso_sb;
708                 uint64_t mask;
709
710                 mask = -(!w1.il3type);
711                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
712
713                 send_hdr_ext->w0.lso_sb = lso_sb;
714                 send_hdr_ext->w0.lso = 1;
715                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
716                 send_hdr_ext->w0.lso_format =
717                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
718                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
719
720                 /* Handle tunnel tso */
721                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
722                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
723                         const uint8_t is_udp_tun =
724                                 (CNXK_NIX_UDP_TUN_BITMASK >>
725                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
726                                 0x1;
727                         uint8_t shift = is_udp_tun ? 32 : 0;
728
729                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
730                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
731
732                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
733                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
734                         /* Update format for UDP tunneled packet */
735                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
736                 }
737         }
738
739         if (flags & NIX_TX_NEED_SEND_HDR_W1)
740                 send_hdr->w1.u = w1.u;
741
742         if (!(flags & NIX_TX_MULTI_SEG_F)) {
743                 sg->seg1_size = send_hdr->w0.total;
744                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
745
746                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
747                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
748                          *              is greater than 1
749                          * DF bit = 0 otherwise
750                          */
751                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
752                 }
753                 /* Mark mempool object as "put" since it is freed by NIX */
754                 if (!send_hdr->w0.df)
755                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
756         } else {
757                 sg->seg1_size = m->data_len;
758                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
759
760                 /* NOFF is handled later for multi-seg */
761         }
762
763         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
764                 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
765 }
766
767 static __rte_always_inline void
768 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
769                            const uint16_t flags)
770 {
771         struct nix_send_ext_s *send_hdr_ext;
772         union nix_send_sg_s *sg;
773
774         /* With minimal offloads, 'cmd' being local could be optimized out to
775          * registers. In other cases, 'cmd' will be in stack. Intent is
776          * 'cmd' stores content from txq->cmd which is copied only once.
777          */
778         *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
779         lmt_addr += 16;
780         if (flags & NIX_TX_NEED_EXT_HDR) {
781                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
782                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
783                 lmt_addr += 16;
784
785                 sg = (union nix_send_sg_s *)(cmd + 4);
786         } else {
787                 sg = (union nix_send_sg_s *)(cmd + 2);
788         }
789         /* In case of multi-seg, sg template is stored here */
790         *((union nix_send_sg_s *)lmt_addr) = *sg;
791         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
792 }
793
794 static __rte_always_inline void
795 cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
796                               const uint64_t ol_flags, const uint16_t no_segdw,
797                               const uint16_t flags)
798 {
799         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
800                 const uint8_t is_ol_tstamp =
801                         !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
802                 uint64_t *lmt = (uint64_t *)lmt_addr;
803                 uint16_t off = (no_segdw - 1) << 1;
804                 struct nix_send_mem_s *send_mem;
805
806                 send_mem = (struct nix_send_mem_s *)(lmt + off);
807                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
808                  * should not be recorded, hence changing the alg type to
809                  * NIX_SENDMEMALG_SUB and also changing send mem addr field to
810                  * next 8 bytes as it corrupts the actual Tx tstamp registered
811                  * address.
812                  */
813                 send_mem->w0.subdc = NIX_SUBDC_MEM;
814                 send_mem->w0.alg =
815                         NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
816                 send_mem->addr =
817                         (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
818         }
819 }
820
821 static __rte_always_inline uint16_t
822 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
823 {
824         struct nix_send_hdr_s *send_hdr;
825         union nix_send_sg_s *sg;
826         struct rte_mbuf *m_next;
827         uint64_t *slist, sg_u;
828         uint64_t nb_segs;
829         uint64_t segdw;
830         uint8_t off, i;
831
832         send_hdr = (struct nix_send_hdr_s *)cmd;
833
834         if (flags & NIX_TX_NEED_EXT_HDR)
835                 off = 2;
836         else
837                 off = 0;
838
839         sg = (union nix_send_sg_s *)&cmd[2 + off];
840
841         /* Start from second segment, first segment is already there */
842         i = 1;
843         sg_u = sg->u;
844         nb_segs = m->nb_segs - 1;
845         m_next = m->next;
846         slist = &cmd[3 + off + 1];
847
848         /* Set invert df if buffer is not to be freed by H/W */
849         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
850                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
851
852                 /* Mark mempool object as "put" since it is freed by NIX */
853 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
854         if (!(sg_u & (1ULL << 55)))
855                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
856         rte_io_wmb();
857 #endif
858         m = m_next;
859         if (!m)
860                 goto done;
861
862         /* Fill mbuf segments */
863         do {
864                 m_next = m->next;
865                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
866                 *slist = rte_mbuf_data_iova(m);
867                 /* Set invert df if buffer is not to be freed by H/W */
868                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
869                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
870                         /* Mark mempool object as "put" since it is freed by NIX
871                          */
872 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
873                 if (!(sg_u & (1ULL << (i + 55))))
874                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
875 #endif
876                 slist++;
877                 i++;
878                 nb_segs--;
879                 if (i > 2 && nb_segs) {
880                         i = 0;
881                         /* Next SG subdesc */
882                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
883                         sg->u = sg_u;
884                         sg->segs = 3;
885                         sg = (union nix_send_sg_s *)slist;
886                         sg_u = sg->u;
887                         slist++;
888                 }
889                 m = m_next;
890         } while (nb_segs);
891
892 done:
893         sg->u = sg_u;
894         sg->segs = i;
895         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
896         /* Roundup extra dwords to multiple of 2 */
897         segdw = (segdw >> 1) + (segdw & 0x1);
898         /* Default dwords */
899         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
900         send_hdr->w0.sizem1 = segdw - 1;
901
902         return segdw;
903 }
904
905 static __rte_always_inline uint16_t
906 cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
907                     uint16_t pkts, uint64_t *cmd, const uint16_t flags)
908 {
909         struct cn10k_eth_txq *txq = tx_queue;
910         const rte_iova_t io_addr = txq->io_addr;
911         uint8_t lnum, c_lnum, c_shft, c_loff;
912         uintptr_t pa, lbase = txq->lmt_base;
913         uint16_t lmt_id, burst, left, i;
914         uintptr_t c_lbase = lbase;
915         uint64_t lso_tun_fmt = 0;
916         uint64_t mark_fmt = 0;
917         uint8_t mark_flag = 0;
918         rte_iova_t c_io_addr;
919         uint16_t c_lmt_id;
920         uint64_t sa_base;
921         uintptr_t laddr;
922         uint64_t data;
923         bool sec;
924
925         if (!(flags & NIX_TX_VWQE_F)) {
926                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
927                 /* Reduce the cached count */
928                 txq->fc_cache_pkts -= pkts;
929         }
930         /* Get cmd skeleton */
931         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
932
933         if (flags & NIX_TX_OFFLOAD_TSO_F)
934                 lso_tun_fmt = txq->lso_tun_fmt;
935
936         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
937                 mark_fmt = txq->mark_fmt;
938                 mark_flag = txq->mark_flag;
939         }
940
941         /* Get LMT base address and LMT ID as lcore id */
942         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
943         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
944                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
945                 c_io_addr = txq->cpt_io_addr;
946                 sa_base = txq->sa_base;
947         }
948
949         left = pkts;
950 again:
951         burst = left > 32 ? 32 : left;
952
953         lnum = 0;
954         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
955                 c_lnum = 0;
956                 c_loff = 0;
957                 c_shft = 16;
958         }
959
960         for (i = 0; i < burst; i++) {
961                 /* Perform header writes for TSO, barrier at
962                  * lmt steorl will suffice.
963                  */
964                 if (flags & NIX_TX_OFFLOAD_TSO_F)
965                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
966
967                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
968                                        &sec, mark_flag, mark_fmt);
969
970                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
971
972                 /* Prepare CPT instruction and get nixtx addr */
973                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
974                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
975                                            &c_lnum, &c_loff, &c_shft, sa_base,
976                                            flags);
977
978                 /* Move NIX desc to LMT/NIXTX area */
979                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
980                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
981                                               4, flags);
982                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
983                         lnum++;
984         }
985
986         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
987                 ws[1] = roc_sso_hws_head_wait(ws[0]);
988
989         left -= burst;
990         tx_pkts += burst;
991
992         /* Submit CPT instructions if any */
993         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
994                 /* Reduce pkts to be sent to CPT */
995                 burst -= ((c_lnum << 1) + c_loff);
996                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
997                                      c_shft);
998         }
999
1000         /* Trigger LMTST */
1001         if (burst > 16) {
1002                 data = cn10k_nix_tx_steor_data(flags);
1003                 pa = io_addr | (data & 0x7) << 4;
1004                 data &= ~0x7ULL;
1005                 data |= (15ULL << 12);
1006                 data |= (uint64_t)lmt_id;
1007
1008                 /* STEOR0 */
1009                 roc_lmt_submit_steorl(data, pa);
1010
1011                 data = cn10k_nix_tx_steor_data(flags);
1012                 pa = io_addr | (data & 0x7) << 4;
1013                 data &= ~0x7ULL;
1014                 data |= ((uint64_t)(burst - 17)) << 12;
1015                 data |= (uint64_t)(lmt_id + 16);
1016
1017                 /* STEOR1 */
1018                 roc_lmt_submit_steorl(data, pa);
1019         } else if (burst) {
1020                 data = cn10k_nix_tx_steor_data(flags);
1021                 pa = io_addr | (data & 0x7) << 4;
1022                 data &= ~0x7ULL;
1023                 data |= ((uint64_t)(burst - 1)) << 12;
1024                 data |= lmt_id;
1025
1026                 /* STEOR0 */
1027                 roc_lmt_submit_steorl(data, pa);
1028         }
1029
1030         rte_io_wmb();
1031         if (left)
1032                 goto again;
1033
1034         return pkts;
1035 }
1036
1037 static __rte_always_inline uint16_t
1038 cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
1039                          struct rte_mbuf **tx_pkts, uint16_t pkts,
1040                          uint64_t *cmd, const uint16_t flags)
1041 {
1042         struct cn10k_eth_txq *txq = tx_queue;
1043         uintptr_t pa0, pa1, lbase = txq->lmt_base;
1044         const rte_iova_t io_addr = txq->io_addr;
1045         uint16_t segdw, lmt_id, burst, left, i;
1046         uint8_t lnum, c_lnum, c_loff;
1047         uintptr_t c_lbase = lbase;
1048         uint64_t lso_tun_fmt = 0;
1049         uint64_t mark_fmt = 0;
1050         uint8_t mark_flag = 0;
1051         uint64_t data0, data1;
1052         rte_iova_t c_io_addr;
1053         uint8_t shft, c_shft;
1054         __uint128_t data128;
1055         uint16_t c_lmt_id;
1056         uint64_t sa_base;
1057         uintptr_t laddr;
1058         bool sec;
1059
1060         if (!(flags & NIX_TX_VWQE_F)) {
1061                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1062                 /* Reduce the cached count */
1063                 txq->fc_cache_pkts -= pkts;
1064         }
1065         /* Get cmd skeleton */
1066         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
1067
1068         if (flags & NIX_TX_OFFLOAD_TSO_F)
1069                 lso_tun_fmt = txq->lso_tun_fmt;
1070
1071         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1072                 mark_fmt = txq->mark_fmt;
1073                 mark_flag = txq->mark_flag;
1074         }
1075
1076         /* Get LMT base address and LMT ID as lcore id */
1077         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1078         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1079                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1080                 c_io_addr = txq->cpt_io_addr;
1081                 sa_base = txq->sa_base;
1082         }
1083
1084         left = pkts;
1085 again:
1086         burst = left > 32 ? 32 : left;
1087         shft = 16;
1088         data128 = 0;
1089
1090         lnum = 0;
1091         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1092                 c_lnum = 0;
1093                 c_loff = 0;
1094                 c_shft = 16;
1095         }
1096
1097         for (i = 0; i < burst; i++) {
1098                 /* Perform header writes for TSO, barrier at
1099                  * lmt steorl will suffice.
1100                  */
1101                 if (flags & NIX_TX_OFFLOAD_TSO_F)
1102                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1103
1104                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1105                                        &sec, mark_flag, mark_fmt);
1106
1107                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1108
1109                 /* Prepare CPT instruction and get nixtx addr */
1110                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1111                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1112                                            &c_lnum, &c_loff, &c_shft, sa_base,
1113                                            flags);
1114
1115                 /* Move NIX desc to LMT/NIXTX area */
1116                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1117                 /* Store sg list directly on lmt line */
1118                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1119                                                flags);
1120                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1121                                               segdw, flags);
1122                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1123                         lnum++;
1124                         data128 |= (((__uint128_t)(segdw - 1)) << shft);
1125                         shft += 3;
1126                 }
1127         }
1128
1129         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1130                 ws[1] = roc_sso_hws_head_wait(ws[0]);
1131
1132         left -= burst;
1133         tx_pkts += burst;
1134
1135         /* Submit CPT instructions if any */
1136         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1137                 /* Reduce pkts to be sent to CPT */
1138                 burst -= ((c_lnum << 1) + c_loff);
1139                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1140                                      c_shft);
1141         }
1142
1143         data0 = (uint64_t)data128;
1144         data1 = (uint64_t)(data128 >> 64);
1145         /* Make data0 similar to data1 */
1146         data0 >>= 16;
1147         /* Trigger LMTST */
1148         if (burst > 16) {
1149                 pa0 = io_addr | (data0 & 0x7) << 4;
1150                 data0 &= ~0x7ULL;
1151                 /* Move lmtst1..15 sz to bits 63:19 */
1152                 data0 <<= 16;
1153                 data0 |= (15ULL << 12);
1154                 data0 |= (uint64_t)lmt_id;
1155
1156                 /* STEOR0 */
1157                 roc_lmt_submit_steorl(data0, pa0);
1158
1159                 pa1 = io_addr | (data1 & 0x7) << 4;
1160                 data1 &= ~0x7ULL;
1161                 data1 <<= 16;
1162                 data1 |= ((uint64_t)(burst - 17)) << 12;
1163                 data1 |= (uint64_t)(lmt_id + 16);
1164
1165                 /* STEOR1 */
1166                 roc_lmt_submit_steorl(data1, pa1);
1167         } else if (burst) {
1168                 pa0 = io_addr | (data0 & 0x7) << 4;
1169                 data0 &= ~0x7ULL;
1170                 /* Move lmtst1..15 sz to bits 63:19 */
1171                 data0 <<= 16;
1172                 data0 |= ((burst - 1) << 12);
1173                 data0 |= (uint64_t)lmt_id;
1174
1175                 /* STEOR0 */
1176                 roc_lmt_submit_steorl(data0, pa0);
1177         }
1178
1179         rte_io_wmb();
1180         if (left)
1181                 goto again;
1182
1183         return pkts;
1184 }
1185
1186 #if defined(RTE_ARCH_ARM64)
1187
1188 static __rte_always_inline void
1189 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1190                       union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1191                       const uint64_t flags, const uint64_t lso_tun_fmt)
1192 {
1193         uint16_t lso_sb;
1194         uint64_t mask;
1195
1196         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1197                 return;
1198
1199         mask = -(!w1->il3type);
1200         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1201
1202         w0->u |= BIT(14);
1203         w0->lso_sb = lso_sb;
1204         w0->lso_mps = m->tso_segsz;
1205         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1206         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1207
1208         /* Handle tunnel tso */
1209         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1210             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1211                 const uint8_t is_udp_tun =
1212                         (CNXK_NIX_UDP_TUN_BITMASK >>
1213                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1214                         0x1;
1215                 uint8_t shift = is_udp_tun ? 32 : 0;
1216
1217                 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1218                 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1219
1220                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1221                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1222                 /* Update format for UDP tunneled packet */
1223
1224                 w0->lso_format = (lso_tun_fmt >> shift);
1225         }
1226 }
1227
1228 static __rte_always_inline void
1229 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1230                                 union nix_send_hdr_w0_u *sh,
1231                                 union nix_send_sg_s *sg, const uint32_t flags)
1232 {
1233         struct rte_mbuf *m_next;
1234         uint64_t *slist, sg_u;
1235         uint16_t nb_segs;
1236         int i = 1;
1237
1238         sh->total = m->pkt_len;
1239         /* Clear sg->u header before use */
1240         sg->u &= 0xFC00000000000000;
1241         sg_u = sg->u;
1242         slist = &cmd[0];
1243
1244         sg_u = sg_u | ((uint64_t)m->data_len);
1245
1246         nb_segs = m->nb_segs - 1;
1247         m_next = m->next;
1248
1249         /* Set invert df if buffer is not to be freed by H/W */
1250         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1251                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1252                 /* Mark mempool object as "put" since it is freed by NIX */
1253 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1254         if (!(sg_u & (1ULL << 55)))
1255                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1256         rte_io_wmb();
1257 #endif
1258
1259         m = m_next;
1260         /* Fill mbuf segments */
1261         do {
1262                 m_next = m->next;
1263                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1264                 *slist = rte_mbuf_data_iova(m);
1265                 /* Set invert df if buffer is not to be freed by H/W */
1266                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1267                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1268                         /* Mark mempool object as "put" since it is freed by NIX
1269                          */
1270 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1271                 if (!(sg_u & (1ULL << (i + 55))))
1272                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1273                 rte_io_wmb();
1274 #endif
1275                 slist++;
1276                 i++;
1277                 nb_segs--;
1278                 if (i > 2 && nb_segs) {
1279                         i = 0;
1280                         /* Next SG subdesc */
1281                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1282                         sg->u = sg_u;
1283                         sg->segs = 3;
1284                         sg = (union nix_send_sg_s *)slist;
1285                         sg_u = sg->u;
1286                         slist++;
1287                 }
1288                 m = m_next;
1289         } while (nb_segs);
1290
1291         sg->u = sg_u;
1292         sg->segs = i;
1293 }
1294
1295 static __rte_always_inline void
1296 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1297                            uint64x2_t *cmd1, const uint8_t segdw,
1298                            const uint32_t flags)
1299 {
1300         union nix_send_hdr_w0_u sh;
1301         union nix_send_sg_s sg;
1302
1303         if (m->nb_segs == 1) {
1304                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1305                         sg.u = vgetq_lane_u64(cmd1[0], 0);
1306                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1307                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1308                 }
1309
1310 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1311                 sg.u = vgetq_lane_u64(cmd1[0], 0);
1312                 if (!(sg.u & (1ULL << 55)))
1313                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1314                 rte_io_wmb();
1315 #endif
1316                 return;
1317         }
1318
1319         sh.u = vgetq_lane_u64(cmd0[0], 0);
1320         sg.u = vgetq_lane_u64(cmd1[0], 0);
1321
1322         cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1323
1324         sh.sizem1 = segdw - 1;
1325         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1326         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1327 }
1328
1329 #define NIX_DESCS_PER_LOOP 4
1330
1331 static __rte_always_inline uint8_t
1332 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1333                                uint64x2_t *cmd1, uint64x2_t *cmd2,
1334                                uint64x2_t *cmd3, uint8_t *segdw,
1335                                uint64_t *lmt_addr, __uint128_t *data128,
1336                                uint8_t *shift, const uint16_t flags)
1337 {
1338         uint8_t j, off, lmt_used;
1339
1340         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1341             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1342                 /* No segments in 4 consecutive packets. */
1343                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1344                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1345                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1346                                                            &cmd0[j], &cmd1[j],
1347                                                            segdw[j], flags);
1348                         vst1q_u64(lmt_addr, cmd0[0]);
1349                         vst1q_u64(lmt_addr + 2, cmd1[0]);
1350                         vst1q_u64(lmt_addr + 4, cmd0[1]);
1351                         vst1q_u64(lmt_addr + 6, cmd1[1]);
1352                         vst1q_u64(lmt_addr + 8, cmd0[2]);
1353                         vst1q_u64(lmt_addr + 10, cmd1[2]);
1354                         vst1q_u64(lmt_addr + 12, cmd0[3]);
1355                         vst1q_u64(lmt_addr + 14, cmd1[3]);
1356
1357                         *data128 |= ((__uint128_t)7) << *shift;
1358                         *shift += 3;
1359
1360                         return 1;
1361                 }
1362         }
1363
1364         lmt_used = 0;
1365         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1366                 /* Fit consecutive packets in same LMTLINE. */
1367                 if ((segdw[j] + segdw[j + 1]) <= 8) {
1368                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1369                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1370                                                            &cmd0[j], &cmd1[j],
1371                                                            segdw[j], flags);
1372                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1373                                                            &cmd0[j + 1],
1374                                                            &cmd1[j + 1],
1375                                                            segdw[j + 1], flags);
1376                                 /* TSTAMP takes 4 each, no segs. */
1377                                 vst1q_u64(lmt_addr, cmd0[j]);
1378                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1379                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1380                                 vst1q_u64(lmt_addr + 6, cmd3[j]);
1381
1382                                 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1383                                 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1384                                 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1385                                 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1386                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1387                                 /* EXT header take 3 each, space for 2 segs.*/
1388                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1389                                                            lmt_addr + 6,
1390                                                            &cmd0[j], &cmd1[j],
1391                                                            segdw[j], flags);
1392                                 vst1q_u64(lmt_addr, cmd0[j]);
1393                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1394                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1395                                 off = segdw[j] - 3;
1396                                 off <<= 1;
1397                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1398                                                            lmt_addr + 12 + off,
1399                                                            &cmd0[j + 1],
1400                                                            &cmd1[j + 1],
1401                                                            segdw[j + 1], flags);
1402                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1403                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1404                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1405                         } else {
1406                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1407                                                            lmt_addr + 4,
1408                                                            &cmd0[j], &cmd1[j],
1409                                                            segdw[j], flags);
1410                                 vst1q_u64(lmt_addr, cmd0[j]);
1411                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1412                                 off = segdw[j] - 2;
1413                                 off <<= 1;
1414                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1415                                                            lmt_addr + 8 + off,
1416                                                            &cmd0[j + 1],
1417                                                            &cmd1[j + 1],
1418                                                            segdw[j + 1], flags);
1419                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1420                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1421                         }
1422                         *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1423                                     << *shift;
1424                         *shift += 3;
1425                         j += 2;
1426                 } else {
1427                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
1428                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1429                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1430                                                            lmt_addr + 6,
1431                                                            &cmd0[j], &cmd1[j],
1432                                                            segdw[j], flags);
1433                                 vst1q_u64(lmt_addr, cmd0[j]);
1434                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1435                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1436                                 off = segdw[j] - 4;
1437                                 off <<= 1;
1438                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1439                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1440                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1441                                                            lmt_addr + 6,
1442                                                            &cmd0[j], &cmd1[j],
1443                                                            segdw[j], flags);
1444                                 vst1q_u64(lmt_addr, cmd0[j]);
1445                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1446                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1447                         } else {
1448                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1449                                                            lmt_addr + 4,
1450                                                            &cmd0[j], &cmd1[j],
1451                                                            segdw[j], flags);
1452                                 vst1q_u64(lmt_addr, cmd0[j]);
1453                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1454                         }
1455                         *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1456                         *shift += 3;
1457                         j++;
1458                 }
1459                 lmt_used++;
1460                 lmt_addr += 16;
1461         }
1462
1463         return lmt_used;
1464 }
1465
1466 static __rte_always_inline void
1467 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1468                    uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1469 {
1470         /* Go to next line if we are out of space */
1471         if ((*loff + (dw << 4)) > 128) {
1472                 *data128 = *data128 |
1473                            (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1474                 *shift = *shift + 3;
1475                 *loff = 0;
1476                 *lnum = *lnum + 1;
1477         }
1478
1479         *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1480         *loff = *loff + (dw << 4);
1481 }
1482
1483 static __rte_always_inline void
1484 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1485                      uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1486                      uint64x2_t cmd3, const uint16_t flags)
1487 {
1488         uint8_t off;
1489
1490         /* Handle no fast free when security is enabled without mseg */
1491         if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1492             (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1493             !(flags & NIX_TX_MULTI_SEG_F)) {
1494                 union nix_send_sg_s sg;
1495
1496                 sg.u = vgetq_lane_u64(cmd1, 0);
1497                 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1498                 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1499
1500 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1501                 sg.u = vgetq_lane_u64(cmd1, 0);
1502                 if (!(sg.u & (1ULL << 55)))
1503                         RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1504                                                 0);
1505                 rte_io_wmb();
1506 #endif
1507         }
1508         if (flags & NIX_TX_MULTI_SEG_F) {
1509                 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1510                     (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1511                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1512                                                    &cmd0, &cmd1, segdw, flags);
1513                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1514                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1515                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1516                         off = segdw - 4;
1517                         off <<= 4;
1518                         vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1519                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1520                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1521                                                    &cmd0, &cmd1, segdw, flags);
1522                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1523                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1524                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1525                 } else {
1526                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1527                                                    &cmd0, &cmd1, segdw, flags);
1528                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1529                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1530                 }
1531         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1532                 /* Store the prepared send desc to LMT lines */
1533                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1534                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1535                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1536                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1537                         vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1538                 } else {
1539                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1540                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1541                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1542                 }
1543         } else {
1544                 /* Store the prepared send desc to LMT lines */
1545                 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1546                 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1547         }
1548 }
1549
1550 static __rte_always_inline uint16_t
1551 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1552                            struct rte_mbuf **tx_pkts, uint16_t pkts,
1553                            uint64_t *cmd, const uint16_t flags)
1554 {
1555         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1556         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1557         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1558                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1559         uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1560         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1561         uint64x2_t senddesc01_w0, senddesc23_w0;
1562         uint64x2_t senddesc01_w1, senddesc23_w1;
1563         uint64x2_t sendext01_w0, sendext23_w0;
1564         uint64x2_t sendext01_w1, sendext23_w1;
1565         uint64x2_t sendmem01_w0, sendmem23_w0;
1566         uint64x2_t sendmem01_w1, sendmem23_w1;
1567         uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1568         uint64x2_t sgdesc01_w0, sgdesc23_w0;
1569         uint64x2_t sgdesc01_w1, sgdesc23_w1;
1570         struct cn10k_eth_txq *txq = tx_queue;
1571         rte_iova_t io_addr = txq->io_addr;
1572         uintptr_t laddr = txq->lmt_base;
1573         uint8_t c_lnum, c_shft, c_loff;
1574         uint64x2_t ltypes01, ltypes23;
1575         uint64x2_t xtmp128, ytmp128;
1576         uint64x2_t xmask01, xmask23;
1577         uintptr_t c_laddr = laddr;
1578         uint8_t lnum, shift, loff;
1579         rte_iova_t c_io_addr;
1580         uint64_t sa_base;
1581         union wdata {
1582                 __uint128_t data128;
1583                 uint64_t data[2];
1584         } wd;
1585
1586         if (!(flags & NIX_TX_VWQE_F)) {
1587                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1588                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1589                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1590                 /* Reduce the cached count */
1591                 txq->fc_cache_pkts -= pkts;
1592         } else {
1593                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1594                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1595         }
1596
1597         /* Perform header writes before barrier for TSO */
1598         if (flags & NIX_TX_OFFLOAD_TSO_F) {
1599                 for (i = 0; i < pkts; i++)
1600                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1601         }
1602
1603         if (!(flags & NIX_TX_VWQE_F)) {
1604                 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1605         } else {
1606                 uint64_t w0 =
1607                         (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1608                         ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1609
1610                 senddesc01_w0 = vdupq_n_u64(w0);
1611         }
1612         senddesc23_w0 = senddesc01_w0;
1613
1614         senddesc01_w1 = vdupq_n_u64(0);
1615         senddesc23_w1 = senddesc01_w1;
1616         sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1617         sgdesc23_w0 = sgdesc01_w0;
1618
1619         if (flags & NIX_TX_NEED_EXT_HDR) {
1620                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1621                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1622                                                    BIT_ULL(15));
1623                         sendmem01_w0 =
1624                                 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1625                                             (NIX_SENDMEMALG_SETTSTMP << 56));
1626                         sendmem23_w0 = sendmem01_w0;
1627                         sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1628                         sendmem23_w1 = sendmem01_w1;
1629                 } else {
1630                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1631                 }
1632                 sendext23_w0 = sendext01_w0;
1633
1634                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1635                         sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1636                 else
1637                         sendext01_w1 = vdupq_n_u64(0);
1638                 sendext23_w1 = sendext01_w1;
1639         }
1640
1641         /* Get LMT base address and LMT ID as lcore id */
1642         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1643         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1644                 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1645                 c_io_addr = txq->cpt_io_addr;
1646                 sa_base = txq->sa_base;
1647         }
1648
1649         left = pkts;
1650 again:
1651         /* Number of packets to prepare depends on offloads enabled. */
1652         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1653                               cn10k_nix_pkts_per_vec_brst(flags) :
1654                               left;
1655         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1656                 wd.data128 = 0;
1657                 shift = 16;
1658         }
1659         lnum = 0;
1660         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1661                 loff = 0;
1662                 c_loff = 0;
1663                 c_lnum = 0;
1664                 c_shft = 16;
1665         }
1666
1667         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1668                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1669                         burst = i;
1670                         break;
1671                 }
1672
1673                 if (flags & NIX_TX_MULTI_SEG_F) {
1674                         uint8_t j;
1675
1676                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1677                                 struct rte_mbuf *m = tx_pkts[j];
1678
1679                                 /* Get dwords based on nb_segs. */
1680                                 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1681                                 /* Add dwords based on offloads. */
1682                                 segdw[j] += 1 + /* SEND HDR */
1683                                             !!(flags & NIX_TX_NEED_EXT_HDR) +
1684                                             !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1685                         }
1686
1687                         /* Check if there are enough LMTLINES for this loop */
1688                         if (lnum + 4 > 32) {
1689                                 uint8_t ldwords_con = 0, lneeded = 0;
1690                                 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1691                                         ldwords_con += segdw[j];
1692                                         if (ldwords_con > 8) {
1693                                                 lneeded += 1;
1694                                                 ldwords_con = segdw[j];
1695                                         }
1696                                 }
1697                                 lneeded += 1;
1698                                 if (lnum + lneeded > 32) {
1699                                         burst = i;
1700                                         break;
1701                                 }
1702                         }
1703                 }
1704                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1705                 senddesc01_w0 =
1706                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1707                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1708
1709                 senddesc23_w0 = senddesc01_w0;
1710                 sgdesc23_w0 = sgdesc01_w0;
1711
1712                 /* Clear vlan enables. */
1713                 if (flags & NIX_TX_NEED_EXT_HDR) {
1714                         sendext01_w1 = vbicq_u64(sendext01_w1,
1715                                                  vdupq_n_u64(0x3FFFF00FFFF00));
1716                         sendext23_w1 = sendext01_w1;
1717                 }
1718
1719                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1720                         /* Reset send mem alg to SETTSTMP from SUB*/
1721                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
1722                                                  vdupq_n_u64(BIT_ULL(59)));
1723                         /* Reset send mem address to default. */
1724                         sendmem01_w1 =
1725                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1726                         sendmem23_w0 = sendmem01_w0;
1727                         sendmem23_w1 = sendmem01_w1;
1728                 }
1729
1730                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1731                         /* Clear the LSO enable bit. */
1732                         sendext01_w0 = vbicq_u64(sendext01_w0,
1733                                                  vdupq_n_u64(BIT_ULL(14)));
1734                         sendext23_w0 = sendext01_w0;
1735                 }
1736
1737                 /* Move mbufs to iova */
1738                 mbuf0 = (uint64_t *)tx_pkts[0];
1739                 mbuf1 = (uint64_t *)tx_pkts[1];
1740                 mbuf2 = (uint64_t *)tx_pkts[2];
1741                 mbuf3 = (uint64_t *)tx_pkts[3];
1742
1743                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1744                                      offsetof(struct rte_mbuf, buf_iova));
1745                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1746                                      offsetof(struct rte_mbuf, buf_iova));
1747                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1748                                      offsetof(struct rte_mbuf, buf_iova));
1749                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1750                                      offsetof(struct rte_mbuf, buf_iova));
1751                 /*
1752                  * Get mbuf's, olflags, iova, pktlen, dataoff
1753                  * dataoff_iovaX.D[0] = iova,
1754                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1755                  * len_olflagsX.D[0] = ol_flags,
1756                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1757                  */
1758                 dataoff_iova0 = vld1q_u64(mbuf0);
1759                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1760                 dataoff_iova1 = vld1q_u64(mbuf1);
1761                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1762                 dataoff_iova2 = vld1q_u64(mbuf2);
1763                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1764                 dataoff_iova3 = vld1q_u64(mbuf3);
1765                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1766
1767                 /* Move mbufs to point pool */
1768                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1769                                      offsetof(struct rte_mbuf, pool) -
1770                                      offsetof(struct rte_mbuf, buf_iova));
1771                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1772                                      offsetof(struct rte_mbuf, pool) -
1773                                      offsetof(struct rte_mbuf, buf_iova));
1774                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1775                                      offsetof(struct rte_mbuf, pool) -
1776                                      offsetof(struct rte_mbuf, buf_iova));
1777                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1778                                      offsetof(struct rte_mbuf, pool) -
1779                                      offsetof(struct rte_mbuf, buf_iova));
1780
1781                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1782                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1783                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1784                         /*
1785                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1786                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1787                          */
1788
1789                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1790                                      : [a] "+w"(senddesc01_w1)
1791                                      : [in] "r"(mbuf0 + 2)
1792                                      : "memory");
1793
1794                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1795                                      : [a] "+w"(senddesc01_w1)
1796                                      : [in] "r"(mbuf1 + 2)
1797                                      : "memory");
1798
1799                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1800                                      : [b] "+w"(senddesc23_w1)
1801                                      : [in] "r"(mbuf2 + 2)
1802                                      : "memory");
1803
1804                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1805                                      : [b] "+w"(senddesc23_w1)
1806                                      : [in] "r"(mbuf3 + 2)
1807                                      : "memory");
1808
1809                         /* Get pool pointer alone */
1810                         mbuf0 = (uint64_t *)*mbuf0;
1811                         mbuf1 = (uint64_t *)*mbuf1;
1812                         mbuf2 = (uint64_t *)*mbuf2;
1813                         mbuf3 = (uint64_t *)*mbuf3;
1814                 } else {
1815                         /* Get pool pointer alone */
1816                         mbuf0 = (uint64_t *)*mbuf0;
1817                         mbuf1 = (uint64_t *)*mbuf1;
1818                         mbuf2 = (uint64_t *)*mbuf2;
1819                         mbuf3 = (uint64_t *)*mbuf3;
1820                 }
1821
1822                 const uint8x16_t shuf_mask2 = {
1823                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1824                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1825                 };
1826                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1827                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1828
1829                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1830                 const uint64x2_t and_mask0 = {
1831                         0xFFFFFFFFFFFFFFFF,
1832                         0x000000000000FFFF,
1833                 };
1834
1835                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1836                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1837                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1838                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1839
1840                 /*
1841                  * Pick only 16 bits of pktlen preset at bits 63:32
1842                  * and place them at bits 15:0.
1843                  */
1844                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1845                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1846
1847                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1848                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1849                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1850
1851                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1852                  * pktlen at 15:0 position.
1853                  */
1854                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1855                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1856                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1857                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1858
1859                 /* Move mbuf to point to pool_id. */
1860                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1861                                      offsetof(struct rte_mempool, pool_id));
1862                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1863                                      offsetof(struct rte_mempool, pool_id));
1864                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1865                                      offsetof(struct rte_mempool, pool_id));
1866                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1867                                      offsetof(struct rte_mempool, pool_id));
1868
1869                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1870                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1871                         /*
1872                          * Lookup table to translate ol_flags to
1873                          * il3/il4 types. But we still use ol3/ol4 types in
1874                          * senddesc_w1 as only one header processing is enabled.
1875                          */
1876                         const uint8x16_t tbl = {
1877                                 /* [0-15] = il4type:il3type */
1878                                 0x04, /* none (IPv6 assumed) */
1879                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1880                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1881                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1882                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1883                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1884                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1885                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1886                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1887                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1888                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1889                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1890                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1891                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1892                                        * RTE_MBUF_F_TX_TCP_CKSUM
1893                                        */
1894                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1895                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1896                                        */
1897                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1898                                        * RTE_MBUF_F_TX_UDP_CKSUM
1899                                        */
1900                         };
1901
1902                         /* Extract olflags to translate to iltypes */
1903                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1904                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1905
1906                         /*
1907                          * E(47):L3_LEN(9):L2_LEN(7+z)
1908                          * E(47):L3_LEN(9):L2_LEN(7+z)
1909                          */
1910                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1911                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1912
1913                         /* Move OLFLAGS bits 55:52 to 51:48
1914                          * with zeros preprended on the byte and rest
1915                          * don't care
1916                          */
1917                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1918                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1919                         /*
1920                          * E(48):L3_LEN(8):L2_LEN(z+7)
1921                          * E(48):L3_LEN(8):L2_LEN(z+7)
1922                          */
1923                         const int8x16_t tshft3 = {
1924                                 -1, 0, 8, 8, 8, 8, 8, 8,
1925                                 -1, 0, 8, 8, 8, 8, 8, 8,
1926                         };
1927
1928                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1929                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1930
1931                         /* Do the lookup */
1932                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1933                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1934
1935                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1936                          * and place it in ol3/ol4type of senddesc_w1
1937                          */
1938                         const uint8x16_t shuf_mask0 = {
1939                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1940                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1941                         };
1942
1943                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1944                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1945
1946                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1947                          * a [E(32):E(16):OL3(8):OL2(8)]
1948                          * a = a + (a << 8)
1949                          * a [E(32):E(16):(OL3+OL2):OL2]
1950                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1951                          */
1952                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1953                                                  vshlq_n_u16(senddesc01_w1, 8));
1954                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1955                                                  vshlq_n_u16(senddesc23_w1, 8));
1956
1957                         /* Move ltypes to senddesc*_w1 */
1958                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1959                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1960                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1961                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1962                         /*
1963                          * Lookup table to translate ol_flags to
1964                          * ol3/ol4 types.
1965                          */
1966
1967                         const uint8x16_t tbl = {
1968                                 /* [0-15] = ol4type:ol3type */
1969                                 0x00, /* none */
1970                                 0x03, /* OUTER_IP_CKSUM */
1971                                 0x02, /* OUTER_IPV4 */
1972                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1973                                 0x04, /* OUTER_IPV6 */
1974                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1975                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1976                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1977                                        * OUTER_IP_CKSUM
1978                                        */
1979                                 0x00, /* OUTER_UDP_CKSUM */
1980                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1981                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1982                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1983                                        * OUTER_IP_CKSUM
1984                                        */
1985                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1986                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1987                                        * OUTER_IP_CKSUM
1988                                        */
1989                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1990                                        * OUTER_IPV4
1991                                        */
1992                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1993                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1994                                        */
1995                         };
1996
1997                         /* Extract olflags to translate to iltypes */
1998                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1999                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2000
2001                         /*
2002                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
2003                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
2004                          */
2005                         const uint8x16_t shuf_mask5 = {
2006                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2007                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2008                         };
2009                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2010                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2011
2012                         /* Extract outer ol flags only */
2013                         const uint64x2_t o_cksum_mask = {
2014                                 0x1C00020000000000,
2015                                 0x1C00020000000000,
2016                         };
2017
2018                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
2019                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
2020
2021                         /* Extract OUTER_UDP_CKSUM bit 41 and
2022                          * move it to bit 61
2023                          */
2024
2025                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2026                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2027
2028                         /* Shift oltype by 2 to start nibble from BIT(56)
2029                          * instead of BIT(58)
2030                          */
2031                         xtmp128 = vshrq_n_u8(xtmp128, 2);
2032                         ytmp128 = vshrq_n_u8(ytmp128, 2);
2033                         /*
2034                          * E(48):L3_LEN(8):L2_LEN(z+7)
2035                          * E(48):L3_LEN(8):L2_LEN(z+7)
2036                          */
2037                         const int8x16_t tshft3 = {
2038                                 -1, 0, 8, 8, 8, 8, 8, 8,
2039                                 -1, 0, 8, 8, 8, 8, 8, 8,
2040                         };
2041
2042                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2043                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2044
2045                         /* Do the lookup */
2046                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
2047                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
2048
2049                         /* Pick only relevant fields i.e Bit 56:63 of oltype
2050                          * and place it in ol3/ol4type of senddesc_w1
2051                          */
2052                         const uint8x16_t shuf_mask0 = {
2053                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
2054                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
2055                         };
2056
2057                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2058                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2059
2060                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
2061                          * a [E(32):E(16):OL3(8):OL2(8)]
2062                          * a = a + (a << 8)
2063                          * a [E(32):E(16):(OL3+OL2):OL2]
2064                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
2065                          */
2066                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2067                                                  vshlq_n_u16(senddesc01_w1, 8));
2068                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2069                                                  vshlq_n_u16(senddesc23_w1, 8));
2070
2071                         /* Move ltypes to senddesc*_w1 */
2072                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2073                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2074                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
2075                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
2076                         /* Lookup table to translate ol_flags to
2077                          * ol4type, ol3type, il4type, il3type of senddesc_w1
2078                          */
2079                         const uint8x16x2_t tbl = {{
2080                                 {
2081                                         /* [0-15] = il4type:il3type */
2082                                         0x04, /* none (IPv6) */
2083                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
2084                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
2085                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
2086                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
2087                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
2088                                                * RTE_MBUF_F_TX_TCP_CKSUM
2089                                                */
2090                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2091                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2092                                                */
2093                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2094                                                * RTE_MBUF_F_TX_UDP_CKSUM
2095                                                */
2096                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
2097                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
2098                                                * RTE_MBUF_F_TX_TCP_CKSUM
2099                                                */
2100                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
2101                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2102                                                */
2103                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
2104                                                * RTE_MBUF_F_TX_UDP_CKSUM
2105                                                */
2106                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
2107                                                * RTE_MBUF_F_TX_IP_CKSUM
2108                                                */
2109                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2110                                                * RTE_MBUF_F_TX_TCP_CKSUM
2111                                                */
2112                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2113                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2114                                                */
2115                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2116                                                * RTE_MBUF_F_TX_UDP_CKSUM
2117                                                */
2118                                 },
2119
2120                                 {
2121                                         /* [16-31] = ol4type:ol3type */
2122                                         0x00, /* none */
2123                                         0x03, /* OUTER_IP_CKSUM */
2124                                         0x02, /* OUTER_IPV4 */
2125                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2126                                         0x04, /* OUTER_IPV6 */
2127                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2128                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2129                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2130                                                * OUTER_IP_CKSUM
2131                                                */
2132                                         0x00, /* OUTER_UDP_CKSUM */
2133                                         0x33, /* OUTER_UDP_CKSUM |
2134                                                * OUTER_IP_CKSUM
2135                                                */
2136                                         0x32, /* OUTER_UDP_CKSUM |
2137                                                * OUTER_IPV4
2138                                                */
2139                                         0x33, /* OUTER_UDP_CKSUM |
2140                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2141                                                */
2142                                         0x34, /* OUTER_UDP_CKSUM |
2143                                                * OUTER_IPV6
2144                                                */
2145                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2146                                                * OUTER_IP_CKSUM
2147                                                */
2148                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2149                                                * OUTER_IPV4
2150                                                */
2151                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2152                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2153                                                */
2154                                 },
2155                         }};
2156
2157                         /* Extract olflags to translate to oltype & iltype */
2158                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2159                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2160
2161                         /*
2162                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2163                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2164                          */
2165                         const uint32x4_t tshft_4 = {
2166                                 1,
2167                                 0,
2168                                 1,
2169                                 0,
2170                         };
2171                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2172                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2173
2174                         /*
2175                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2176                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2177                          */
2178                         const uint8x16_t shuf_mask5 = {
2179                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2180                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2181                         };
2182                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2183                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2184
2185                         /* Extract outer and inner header ol_flags */
2186                         const uint64x2_t oi_cksum_mask = {
2187                                 0x1CF0020000000000,
2188                                 0x1CF0020000000000,
2189                         };
2190
2191                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2192                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2193
2194                         /* Extract OUTER_UDP_CKSUM bit 41 and
2195                          * move it to bit 61
2196                          */
2197
2198                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2199                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2200
2201                         /* Shift right oltype by 2 and iltype by 4
2202                          * to start oltype nibble from BIT(58)
2203                          * instead of BIT(56) and iltype nibble from BIT(48)
2204                          * instead of BIT(52).
2205                          */
2206                         const int8x16_t tshft5 = {
2207                                 8, 8, 8, 8, 8, 8, -4, -2,
2208                                 8, 8, 8, 8, 8, 8, -4, -2,
2209                         };
2210
2211                         xtmp128 = vshlq_u8(xtmp128, tshft5);
2212                         ytmp128 = vshlq_u8(ytmp128, tshft5);
2213                         /*
2214                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2215                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2216                          */
2217                         const int8x16_t tshft3 = {
2218                                 -1, 0, -1, 0, 0, 0, 0, 0,
2219                                 -1, 0, -1, 0, 0, 0, 0, 0,
2220                         };
2221
2222                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2223                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2224
2225                         /* Mark Bit(4) of oltype */
2226                         const uint64x2_t oi_cksum_mask2 = {
2227                                 0x1000000000000000,
2228                                 0x1000000000000000,
2229                         };
2230
2231                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2232                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2233
2234                         /* Do the lookup */
2235                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2236                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2237
2238                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
2239                          * Bit 56:63 of oltype and place it in corresponding
2240                          * place in senddesc_w1.
2241                          */
2242                         const uint8x16_t shuf_mask0 = {
2243                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2244                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2245                         };
2246
2247                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2248                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2249
2250                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2251                          * l3len, l2len, ol3len, ol2len.
2252                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2253                          * a = a + (a << 8)
2254                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2255                          * a = a + (a << 16)
2256                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2257                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2258                          */
2259                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2260                                                  vshlq_n_u32(senddesc01_w1, 8));
2261                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2262                                                  vshlq_n_u32(senddesc23_w1, 8));
2263
2264                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2265                         senddesc01_w1 = vaddq_u8(
2266                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2267                         senddesc23_w1 = vaddq_u8(
2268                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2269
2270                         /* Move ltypes to senddesc*_w1 */
2271                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2272                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2273                 }
2274
2275                 xmask01 = vdupq_n_u64(0);
2276                 xmask23 = xmask01;
2277                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2278                              : [a] "+w"(xmask01)
2279                              : [in] "r"(mbuf0)
2280                              : "memory");
2281
2282                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2283                              : [a] "+w"(xmask01)
2284                              : [in] "r"(mbuf1)
2285                              : "memory");
2286
2287                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2288                              : [b] "+w"(xmask23)
2289                              : [in] "r"(mbuf2)
2290                              : "memory");
2291
2292                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2293                              : [b] "+w"(xmask23)
2294                              : [in] "r"(mbuf3)
2295                              : "memory");
2296                 xmask01 = vshlq_n_u64(xmask01, 20);
2297                 xmask23 = vshlq_n_u64(xmask23, 20);
2298
2299                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2300                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2301
2302                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2303                         /* Tx ol_flag for vlan. */
2304                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2305                         /* Bit enable for VLAN1 */
2306                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2307                         /* Tx ol_flag for QnQ. */
2308                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2309                         /* Bit enable for VLAN0 */
2310                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2311                         /* Load vlan values from packet. outer is VLAN 0 */
2312                         uint64x2_t ext01 = {
2313                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2314                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2315                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2316                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2317                         };
2318                         uint64x2_t ext23 = {
2319                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2320                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2321                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2322                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2323                         };
2324
2325                         /* Get ol_flags of the packets. */
2326                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2327                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2328
2329                         /* ORR vlan outer/inner values into cmd. */
2330                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2331                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2332
2333                         /* Test for offload enable bits and generate masks. */
2334                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2335                                                       mlv),
2336                                             vandq_u64(vtstq_u64(xtmp128, olq),
2337                                                       mlq));
2338                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2339                                                       mlv),
2340                                             vandq_u64(vtstq_u64(ytmp128, olq),
2341                                                       mlq));
2342
2343                         /* Set vlan enable bits into cmd based on mask. */
2344                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2345                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2346                 }
2347
2348                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2349                         /* Tx ol_flag for timestamp. */
2350                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2351                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
2352                         /* Set send mem alg to SUB. */
2353                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2354                         /* Increment send mem address by 8. */
2355                         const uint64x2_t addr = {0x8, 0x8};
2356
2357                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2358                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2359
2360                         /* Check if timestamp is requested and generate inverted
2361                          * mask as we need not make any changes to default cmd
2362                          * value.
2363                          */
2364                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2365                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2366
2367                         /* Change send mem address to an 8 byte offset when
2368                          * TSTMP is disabled.
2369                          */
2370                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
2371                                                  vandq_u64(xtmp128, addr));
2372                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
2373                                                  vandq_u64(ytmp128, addr));
2374                         /* Change send mem alg to SUB when TSTMP is disabled. */
2375                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
2376                                                  vandq_u64(xtmp128, alg));
2377                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
2378                                                  vandq_u64(ytmp128, alg));
2379
2380                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2381                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2382                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2383                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2384                 }
2385
2386                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2387                         const uint64_t lso_fmt = txq->lso_tun_fmt;
2388                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2389                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2390
2391                         /* Extract SD W1 as we need to set L4 types. */
2392                         vst1q_u64(sd_w1, senddesc01_w1);
2393                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
2394
2395                         /* Extract SX W0 as we need to set LSO fields. */
2396                         vst1q_u64(sx_w0, sendext01_w0);
2397                         vst1q_u64(sx_w0 + 2, sendext23_w0);
2398
2399                         /* Extract ol_flags. */
2400                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2401                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2402
2403                         /* Prepare individual mbufs. */
2404                         cn10k_nix_prepare_tso(tx_pkts[0],
2405                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
2406                                 (union nix_send_ext_w0_u *)&sx_w0[0],
2407                                 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2408
2409                         cn10k_nix_prepare_tso(tx_pkts[1],
2410                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
2411                                 (union nix_send_ext_w0_u *)&sx_w0[1],
2412                                 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2413
2414                         cn10k_nix_prepare_tso(tx_pkts[2],
2415                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
2416                                 (union nix_send_ext_w0_u *)&sx_w0[2],
2417                                 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2418
2419                         cn10k_nix_prepare_tso(tx_pkts[3],
2420                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
2421                                 (union nix_send_ext_w0_u *)&sx_w0[3],
2422                                 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2423
2424                         senddesc01_w1 = vld1q_u64(sd_w1);
2425                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2426
2427                         sendext01_w0 = vld1q_u64(sx_w0);
2428                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
2429                 }
2430
2431                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2432                     !(flags & NIX_TX_MULTI_SEG_F) &&
2433                     !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2434                         /* Set don't free bit if reference count > 1 */
2435                         xmask01 = vdupq_n_u64(0);
2436                         xmask23 = xmask01;
2437
2438                         /* Move mbufs to iova */
2439                         mbuf0 = (uint64_t *)tx_pkts[0];
2440                         mbuf1 = (uint64_t *)tx_pkts[1];
2441                         mbuf2 = (uint64_t *)tx_pkts[2];
2442                         mbuf3 = (uint64_t *)tx_pkts[3];
2443
2444                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2445                                 vsetq_lane_u64(0x80000, xmask01, 0);
2446                         else
2447                                 RTE_MEMPOOL_CHECK_COOKIES(
2448                                         ((struct rte_mbuf *)mbuf0)->pool,
2449                                         (void **)&mbuf0, 1, 0);
2450
2451                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2452                                 vsetq_lane_u64(0x80000, xmask01, 1);
2453                         else
2454                                 RTE_MEMPOOL_CHECK_COOKIES(
2455                                         ((struct rte_mbuf *)mbuf1)->pool,
2456                                         (void **)&mbuf1, 1, 0);
2457
2458                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2459                                 vsetq_lane_u64(0x80000, xmask23, 0);
2460                         else
2461                                 RTE_MEMPOOL_CHECK_COOKIES(
2462                                         ((struct rte_mbuf *)mbuf2)->pool,
2463                                         (void **)&mbuf2, 1, 0);
2464
2465                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2466                                 vsetq_lane_u64(0x80000, xmask23, 1);
2467                         else
2468                                 RTE_MEMPOOL_CHECK_COOKIES(
2469                                         ((struct rte_mbuf *)mbuf3)->pool,
2470                                         (void **)&mbuf3, 1, 0);
2471                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2472                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2473                 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2474                            !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2475                         /* Move mbufs to iova */
2476                         mbuf0 = (uint64_t *)tx_pkts[0];
2477                         mbuf1 = (uint64_t *)tx_pkts[1];
2478                         mbuf2 = (uint64_t *)tx_pkts[2];
2479                         mbuf3 = (uint64_t *)tx_pkts[3];
2480
2481                         /* Mark mempool object as "put" since
2482                          * it is freed by NIX
2483                          */
2484                         RTE_MEMPOOL_CHECK_COOKIES(
2485                                 ((struct rte_mbuf *)mbuf0)->pool,
2486                                 (void **)&mbuf0, 1, 0);
2487
2488                         RTE_MEMPOOL_CHECK_COOKIES(
2489                                 ((struct rte_mbuf *)mbuf1)->pool,
2490                                 (void **)&mbuf1, 1, 0);
2491
2492                         RTE_MEMPOOL_CHECK_COOKIES(
2493                                 ((struct rte_mbuf *)mbuf2)->pool,
2494                                 (void **)&mbuf2, 1, 0);
2495
2496                         RTE_MEMPOOL_CHECK_COOKIES(
2497                                 ((struct rte_mbuf *)mbuf3)->pool,
2498                                 (void **)&mbuf3, 1, 0);
2499                 }
2500
2501                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2502                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2503                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2504                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2505                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2506
2507                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2508                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2509                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2510                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2511
2512                 if (flags & NIX_TX_NEED_EXT_HDR) {
2513                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2514                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2515                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2516                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2517                 }
2518
2519                 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2520                         const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2521                                                 RTE_MBUF_F_TX_SEC_OFFLOAD};
2522                         uintptr_t next;
2523                         uint8_t dw;
2524
2525                         /* Extract ol_flags. */
2526                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2527                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2528
2529                         xtmp128 = vtstq_u64(olf, xtmp128);
2530                         ytmp128 = vtstq_u64(olf, ytmp128);
2531
2532                         /* Process mbuf0 */
2533                         dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2534                         if (vgetq_lane_u64(xtmp128, 0))
2535                                 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2536                                                        &cmd1[0], &next, c_laddr,
2537                                                        &c_lnum, &c_loff,
2538                                                        &c_shft, sa_base, flags);
2539                         else
2540                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2541                                                    &shift, &wd.data128, &next);
2542
2543                         /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2544                         cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2545                                              cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2546                                              flags);
2547
2548                         /* Process mbuf1 */
2549                         dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2550                         if (vgetq_lane_u64(xtmp128, 1))
2551                                 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2552                                                        &cmd1[1], &next, c_laddr,
2553                                                        &c_lnum, &c_loff,
2554                                                        &c_shft, sa_base, flags);
2555                         else
2556                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2557                                                    &shift, &wd.data128, &next);
2558
2559                         /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2560                         cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2561                                              cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2562                                              flags);
2563
2564                         /* Process mbuf2 */
2565                         dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2566                         if (vgetq_lane_u64(ytmp128, 0))
2567                                 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2568                                                        &cmd1[2], &next, c_laddr,
2569                                                        &c_lnum, &c_loff,
2570                                                        &c_shft, sa_base, flags);
2571                         else
2572                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2573                                                    &shift, &wd.data128, &next);
2574
2575                         /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2576                         cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2577                                              cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2578                                              flags);
2579
2580                         /* Process mbuf3 */
2581                         dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2582                         if (vgetq_lane_u64(ytmp128, 1))
2583                                 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2584                                                        &cmd1[3], &next, c_laddr,
2585                                                        &c_lnum, &c_loff,
2586                                                        &c_shft, sa_base, flags);
2587                         else
2588                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2589                                                    &shift, &wd.data128, &next);
2590
2591                         /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2592                         cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2593                                              cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2594                                              flags);
2595
2596                 } else if (flags & NIX_TX_MULTI_SEG_F) {
2597                         uint8_t j;
2598
2599                         segdw[4] = 8;
2600                         j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2601                                                           cmd2, cmd3, segdw,
2602                                                           (uint64_t *)
2603                                                           LMT_OFF(laddr, lnum,
2604                                                                   0),
2605                                                           &wd.data128, &shift,
2606                                                           flags);
2607                         lnum += j;
2608                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2609                         /* Store the prepared send desc to LMT lines */
2610                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2611                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2612                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2613                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2614                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2615                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2616                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2617                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2618                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2619                                 lnum += 1;
2620                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2621                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2622                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2623                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2624                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2625                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2626                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2627                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2628                         } else {
2629                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2630                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2631                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2632                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2633                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2634                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2635                                 lnum += 1;
2636                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2637                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2638                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2639                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2640                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2641                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2642                         }
2643                         lnum += 1;
2644                 } else {
2645                         /* Store the prepared send desc to LMT lines */
2646                         vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2647                         vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2648                         vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2649                         vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2650                         vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2651                         vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2652                         vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2653                         vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2654                         lnum += 1;
2655                 }
2656
2657                 if (flags & NIX_TX_MULTI_SEG_F) {
2658                         tx_pkts[0]->next = NULL;
2659                         tx_pkts[1]->next = NULL;
2660                         tx_pkts[2]->next = NULL;
2661                         tx_pkts[3]->next = NULL;
2662                 }
2663
2664                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2665         }
2666
2667         /* Roundup lnum to last line if it is partial */
2668         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2669                 lnum = lnum + !!loff;
2670                 wd.data128 = wd.data128 |
2671                         (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2672         }
2673
2674         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2675                 wd.data[0] >>= 16;
2676
2677         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
2678                 ws[1] = roc_sso_hws_head_wait(ws[0]);
2679
2680         left -= burst;
2681
2682         /* Submit CPT instructions if any */
2683         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2684                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2685                                      c_shft);
2686
2687         /* Trigger LMTST */
2688         if (lnum > 16) {
2689                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2690                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2691
2692                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2693                 wd.data[0] &= ~0x7ULL;
2694
2695                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2696                         wd.data[0] <<= 16;
2697
2698                 wd.data[0] |= (15ULL << 12);
2699                 wd.data[0] |= (uint64_t)lmt_id;
2700
2701                 /* STEOR0 */
2702                 roc_lmt_submit_steorl(wd.data[0], pa);
2703
2704                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2705                         wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2706
2707                 pa = io_addr | (wd.data[1] & 0x7) << 4;
2708                 wd.data[1] &= ~0x7ULL;
2709
2710                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2711                         wd.data[1] <<= 16;
2712
2713                 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2714                 wd.data[1] |= (uint64_t)(lmt_id + 16);
2715
2716                 /* STEOR1 */
2717                 roc_lmt_submit_steorl(wd.data[1], pa);
2718         } else if (lnum) {
2719                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2720                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2721
2722                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2723                 wd.data[0] &= ~0x7ULL;
2724
2725                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2726                         wd.data[0] <<= 16;
2727
2728                 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2729                 wd.data[0] |= lmt_id;
2730
2731                 /* STEOR0 */
2732                 roc_lmt_submit_steorl(wd.data[0], pa);
2733         }
2734
2735         rte_io_wmb();
2736         if (left)
2737                 goto again;
2738
2739         if (unlikely(scalar)) {
2740                 if (flags & NIX_TX_MULTI_SEG_F)
2741                         pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2742                                                          scalar, cmd, flags);
2743                 else
2744                         pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2745                                                     scalar, cmd, flags);
2746         }
2747
2748         return pkts;
2749 }
2750
2751 #else
2752 static __rte_always_inline uint16_t
2753 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2754                            struct rte_mbuf **tx_pkts, uint16_t pkts,
2755                            uint64_t *cmd, const uint16_t flags)
2756 {
2757         RTE_SET_USED(ws);
2758         RTE_SET_USED(tx_queue);
2759         RTE_SET_USED(tx_pkts);
2760         RTE_SET_USED(pkts);
2761         RTE_SET_USED(cmd);
2762         RTE_SET_USED(flags);
2763         return 0;
2764 }
2765 #endif
2766
2767 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2768 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2769 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2770 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2771 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
2772 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2773 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2774
2775 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2776 #define NIX_TX_FASTPATH_MODES_0_15                                             \
2777         T(no_offload, 6, NIX_TX_OFFLOAD_NONE)                                  \
2778         T(l3l4csum, 6, L3L4CSUM_F)                                             \
2779         T(ol3ol4csum, 6, OL3OL4CSUM_F)                                         \
2780         T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F)                   \
2781         T(vlan, 6, VLAN_F)                                                     \
2782         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
2783         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
2784         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2785         T(noff, 6, NOFF_F)                                                     \
2786         T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F)                               \
2787         T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F)                           \
2788         T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2789         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
2790         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2791         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
2792         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
2793           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2794
2795 #define NIX_TX_FASTPATH_MODES_16_31                                            \
2796         T(tso, 6, TSO_F)                                                       \
2797         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
2798         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
2799         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
2800         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
2801         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
2802         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
2803         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2804           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2805         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
2806         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
2807         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
2808         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
2809           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2810         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
2811         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
2812         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2813         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2814           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2815
2816 #define NIX_TX_FASTPATH_MODES_32_47                                            \
2817         T(ts, 8, TSP_F)                                                        \
2818         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
2819         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
2820         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2821         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
2822         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
2823         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2824         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
2825           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2826         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
2827         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
2828         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2829         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
2830           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2831         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
2832         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
2833         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
2834         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
2835           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2836
2837 #define NIX_TX_FASTPATH_MODES_48_63                                            \
2838         T(ts_tso, 8, TSP_F | TSO_F)                                            \
2839         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
2840         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
2841         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
2842           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
2843         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
2844         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
2845         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
2846         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2847           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2848         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
2849         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
2850         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
2851         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
2852           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2853         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
2854         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
2855           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
2856         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
2857           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
2858         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2859           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2860
2861 #define NIX_TX_FASTPATH_MODES_64_79                                            \
2862         T(sec, 6, T_SEC_F)                                                     \
2863         T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F)                               \
2864         T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F)                           \
2865         T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2866         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
2867         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
2868         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
2869         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2870           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2871         T(sec_noff, 6, T_SEC_F | NOFF_F)                                       \
2872         T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2873         T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2874         T(sec_noff_ol3ol4csum_l3l4csum, 6,                                     \
2875           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2876         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2877         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2878         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2879           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2880         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2881           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2882
2883 #define NIX_TX_FASTPATH_MODES_80_95                                            \
2884         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2885         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2886         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2887         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2888           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2889         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2890         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2891         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2892         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2893           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2894         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2895         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2896         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2897         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2898           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2899         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2900         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2901           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2902         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2903           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2904         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2905           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2906
2907 #define NIX_TX_FASTPATH_MODES_96_111                                           \
2908         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2909         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2910         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2911         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2912           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2913         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2914         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2915         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2916         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2917           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2918         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2919         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2920         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2921         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2922           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2923         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2924         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2925           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2926         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2927           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2928         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2929           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2930
2931 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2932         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2933         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2934         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2935         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2936           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2937         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2938         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2939           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2940         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2941           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2942         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2943           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2944         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2945         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2946           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2947         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2948           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2949         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2950           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2951         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2952         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2953           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2954         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2955           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2956         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2957           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2958                   L3L4CSUM_F)
2959
2960 #define NIX_TX_FASTPATH_MODES                                                  \
2961         NIX_TX_FASTPATH_MODES_0_15                                             \
2962         NIX_TX_FASTPATH_MODES_16_31                                            \
2963         NIX_TX_FASTPATH_MODES_32_47                                            \
2964         NIX_TX_FASTPATH_MODES_48_63                                            \
2965         NIX_TX_FASTPATH_MODES_64_79                                            \
2966         NIX_TX_FASTPATH_MODES_80_95                                            \
2967         NIX_TX_FASTPATH_MODES_96_111                                           \
2968         NIX_TX_FASTPATH_MODES_112_127
2969
2970 #define T(name, sz, flags)                                                     \
2971         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
2972                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2973         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
2974                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2975         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
2976                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2977         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2978                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2979
2980 NIX_TX_FASTPATH_MODES
2981 #undef T
2982
2983 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2984         uint16_t __rte_noinline __rte_hot fn(                                  \
2985                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2986         {                                                                      \
2987                 uint64_t cmd[sz];                                              \
2988                 /* For TSO inner checksum is a must */                         \
2989                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2990                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2991                         return 0;                                              \
2992                 return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
2993                                            flags);                             \
2994         }
2995
2996 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2997         uint16_t __rte_noinline __rte_hot fn(                                  \
2998                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2999         {                                                                      \
3000                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
3001                 /* For TSO inner checksum is a must */                         \
3002                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3003                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3004                         return 0;                                              \
3005                 return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
3006                                                 cmd,                           \
3007                                                 flags | NIX_TX_MULTI_SEG_F);   \
3008         }
3009
3010 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
3011         uint16_t __rte_noinline __rte_hot fn(                                  \
3012                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3013         {                                                                      \
3014                 uint64_t cmd[sz];                                              \
3015                 /* For TSO inner checksum is a must */                         \
3016                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3017                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3018                         return 0;                                              \
3019                 return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts,     \
3020                                                   pkts, cmd, (flags));         \
3021         }
3022
3023 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
3024         uint16_t __rte_noinline __rte_hot fn(                                  \
3025                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3026         {                                                                      \
3027                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
3028                 /* For TSO inner checksum is a must */                         \
3029                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3030                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3031                         return 0;                                              \
3032                 return cn10k_nix_xmit_pkts_vector(                             \
3033                         tx_queue, NULL, tx_pkts, pkts, cmd,                    \
3034                         (flags) | NIX_TX_MULTI_SEG_F);                         \
3035         }
3036
3037 #endif /* __CN10K_TX_H__ */