net/mlx5: add C++ include guard to public header
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #include <rte_eventdev.h>
10
11 #define NIX_TX_OFFLOAD_NONE           (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
19 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
20
21 /* Flags to control xmit_prepare function.
22  * Defining it from backwards to denote its been
23  * not used as offload flags to pick function
24  */
25 #define NIX_TX_VWQE_F      BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
27
28 #define NIX_TX_NEED_SEND_HDR_W1                                                \
29         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
30          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31
32 #define NIX_TX_NEED_EXT_HDR                                                    \
33         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
34          NIX_TX_OFFLOAD_TSO_F)
35
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
37         do {                                                                   \
38                 /* Cached value is low, Update the fc_cache_pkts */            \
39                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
40                         /* Multiply with sqe_per_sqb to express in pkts */     \
41                         (txq)->fc_cache_pkts =                                 \
42                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
43                                 << (txq)->sqes_per_sqb_log2;                   \
44                         /* Check it again for the room */                      \
45                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
46                                 return 0;                                      \
47                 }                                                              \
48         } while (0)
49
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51  * is encoded as 4bits.
52  */
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
54
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56
57 /* Function to determine no of tx subdesc required in case ext
58  * sub desc is enabled.
59  */
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 {
63         return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
64                              2 :
65                              ((flags &
66                          (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
67                                       1 :
68                                       0);
69 }
70
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73 {
74         if (!(flags & NIX_TX_MULTI_SEG_F))
75                 return cn10k_nix_tx_ext_subs(flags) + 2;
76
77         /* Already everything is accounted for in segdw */
78         return segdw;
79 }
80
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83 {
84         return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85                << ROC_LMT_LINES_PER_CORE_LOG2;
86 }
87
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90 {
91         return (flags & NIX_TX_NEED_EXT_HDR) ?
92                              ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
93                              8;
94 }
95
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
98 {
99         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
100         uint64_t data;
101
102         /* This will be moved to addr area */
103         data = dw_m1;
104         /* 15 vector sizes for single seg */
105         data |= dw_m1 << 19;
106         data |= dw_m1 << 22;
107         data |= dw_m1 << 25;
108         data |= dw_m1 << 28;
109         data |= dw_m1 << 31;
110         data |= dw_m1 << 34;
111         data |= dw_m1 << 37;
112         data |= dw_m1 << 40;
113         data |= dw_m1 << 43;
114         data |= dw_m1 << 46;
115         data |= dw_m1 << 49;
116         data |= dw_m1 << 52;
117         data |= dw_m1 << 55;
118         data |= dw_m1 << 58;
119         data |= dw_m1 << 61;
120
121         return data;
122 }
123
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126 {
127         return ((flags & NIX_TX_NEED_EXT_HDR) ?
128                               (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
129                               4);
130 }
131
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134 {
135         const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
136         uint64_t data;
137
138         /* This will be moved to addr area */
139         data = dw_m1;
140         /* 15 vector sizes for single seg */
141         data |= dw_m1 << 19;
142         data |= dw_m1 << 22;
143         data |= dw_m1 << 25;
144         data |= dw_m1 << 28;
145         data |= dw_m1 << 31;
146         data |= dw_m1 << 34;
147         data |= dw_m1 << 37;
148         data |= dw_m1 << 40;
149         data |= dw_m1 << 43;
150         data |= dw_m1 << 46;
151         data |= dw_m1 << 49;
152         data |= dw_m1 << 52;
153         data |= dw_m1 << 55;
154         data |= dw_m1 << 58;
155         data |= dw_m1 << 61;
156
157         return data;
158 }
159
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
162 {
163         /* We have two CPT instructions per LMTLine */
164         const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
165         uint64_t data;
166
167         /* This will be moved to addr area */
168         data = dw_m1 << 16;
169         data |= dw_m1 << 19;
170         data |= dw_m1 << 22;
171         data |= dw_m1 << 25;
172         data |= dw_m1 << 28;
173         data |= dw_m1 << 31;
174         data |= dw_m1 << 34;
175         data |= dw_m1 << 37;
176         data |= dw_m1 << 40;
177         data |= dw_m1 << 43;
178         data |= dw_m1 << 46;
179         data |= dw_m1 << 49;
180         data |= dw_m1 << 52;
181         data |= dw_m1 << 55;
182         data |= dw_m1 << 58;
183         data |= dw_m1 << 61;
184
185         return data;
186 }
187
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
190                       const uint16_t flags)
191 {
192         /* Send hdr */
193         cmd[0] = txq->send_hdr_w0;
194         cmd[1] = 0;
195         cmd += 2;
196
197         /* Send ext if present */
198         if (flags & NIX_TX_NEED_EXT_HDR) {
199                 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
200                 cmd += 2;
201         }
202
203         /* Send sg */
204         cmd[0] = txq->sg_w0;
205         cmd[1] = 0;
206 }
207
208 static __rte_always_inline void
209 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
210                      uint8_t loff, uint8_t shft)
211 {
212         uint64_t data;
213         uintptr_t pa;
214
215         /* Check if there is any CPT instruction to submit */
216         if (!lnum && !loff)
217                 return;
218
219         data = cn10k_cpt_tx_steor_data();
220         /* Update lmtline use for partial end line */
221         if (loff) {
222                 data &= ~(0x7ULL << shft);
223                 /* Update it to half full i.e 64B */
224                 data |= (0x3UL << shft);
225         }
226
227         pa = io_addr | ((data >> 16) & 0x7) << 4;
228         data &= ~(0x7ULL << 16);
229         /* Update lines - 1 that contain valid data */
230         data |= ((uint64_t)(lnum + loff - 1)) << 12;
231         data |= lmt_id;
232
233         /* STEOR */
234         roc_lmt_submit_steorl(data, pa);
235 }
236
237 #if defined(RTE_ARCH_ARM64)
238 static __rte_always_inline void
239 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
240                        uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
241                        uint8_t *loff, uint8_t *shft, uint64_t sa_base,
242                        const uint16_t flags)
243 {
244         struct cn10k_sec_sess_priv sess_priv;
245         uint32_t pkt_len, dlen_adj, rlen;
246         uint64x2_t cmd01, cmd23;
247         uintptr_t dptr, nixtx;
248         uint64_t ucode_cmd[4];
249         uint64_t *laddr;
250         uint8_t l2_len;
251         uint16_t tag;
252         uint64_t sa;
253
254         sess_priv.u64 = *rte_security_dynfield(m);
255
256         if (flags & NIX_TX_NEED_SEND_HDR_W1)
257                 l2_len = vgetq_lane_u8(*cmd0, 8);
258         else
259                 l2_len = m->l2_len;
260
261         /* Retrieve DPTR */
262         dptr = vgetq_lane_u64(*cmd1, 1);
263         pkt_len = vgetq_lane_u16(*cmd0, 0);
264
265         /* Calculate dlen adj */
266         dlen_adj = pkt_len - l2_len;
267         rlen = (dlen_adj + sess_priv.roundup_len) +
268                (sess_priv.roundup_byte - 1);
269         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
270         rlen += sess_priv.partial_len;
271         dlen_adj = rlen - dlen_adj;
272
273         /* Update send descriptors. Security is single segment only */
274         *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
275         *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
276
277         /* Get area where NIX descriptor needs to be stored */
278         nixtx = dptr + pkt_len + dlen_adj;
279         nixtx += BIT_ULL(7);
280         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
281
282         /* Return nixtx addr */
283         *nixtx_addr = (nixtx + 16);
284
285         /* DLEN passed is excluding L2HDR */
286         pkt_len -= l2_len;
287         tag = sa_base & 0xFFFFUL;
288         sa_base &= ~0xFFFFUL;
289         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
290         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
291         ucode_cmd[0] =
292                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
293
294         /* CPT Word 0 and Word 1 */
295         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
296         /* CPT_RES_S is 16B above NIXTX */
297         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
298
299         /* CPT word 2 and 3 */
300         cmd23 = vdupq_n_u64(0);
301         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
302                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
303         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
304
305         dptr += l2_len;
306
307         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
308                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
309                         *((uint16_t *)(dptr - 2)) =
310                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
311                 else
312                         *((uint16_t *)(dptr - 2)) =
313                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
314         }
315
316         ucode_cmd[1] = dptr;
317         ucode_cmd[2] = dptr;
318
319         /* Move to our line */
320         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
321
322         /* Write CPT instruction to lmt line */
323         vst1q_u64(laddr, cmd01);
324         vst1q_u64((laddr + 2), cmd23);
325
326         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
327         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
328
329         /* Move to next line for every other CPT inst */
330         *loff = !(*loff);
331         *lnum = *lnum + (*loff ? 0 : 1);
332         *shft = *shft + (*loff ? 0 : 3);
333 }
334
335 static __rte_always_inline void
336 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
337                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
338                    uint64_t sa_base, const uint16_t flags)
339 {
340         struct cn10k_sec_sess_priv sess_priv;
341         uint32_t pkt_len, dlen_adj, rlen;
342         struct nix_send_hdr_s *send_hdr;
343         uint64x2_t cmd01, cmd23;
344         union nix_send_sg_s *sg;
345         uintptr_t dptr, nixtx;
346         uint64_t ucode_cmd[4];
347         uint64_t *laddr;
348         uint8_t l2_len;
349         uint16_t tag;
350         uint64_t sa;
351
352         /* Move to our line from base */
353         sess_priv.u64 = *rte_security_dynfield(m);
354         send_hdr = (struct nix_send_hdr_s *)cmd;
355         if (flags & NIX_TX_NEED_EXT_HDR)
356                 sg = (union nix_send_sg_s *)&cmd[4];
357         else
358                 sg = (union nix_send_sg_s *)&cmd[2];
359
360         if (flags & NIX_TX_NEED_SEND_HDR_W1)
361                 l2_len = cmd[1] & 0xFF;
362         else
363                 l2_len = m->l2_len;
364
365         /* Retrieve DPTR */
366         dptr = *(uint64_t *)(sg + 1);
367         pkt_len = send_hdr->w0.total;
368
369         /* Calculate dlen adj */
370         dlen_adj = pkt_len - l2_len;
371         rlen = (dlen_adj + sess_priv.roundup_len) +
372                (sess_priv.roundup_byte - 1);
373         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
374         rlen += sess_priv.partial_len;
375         dlen_adj = rlen - dlen_adj;
376
377         /* Update send descriptors. Security is single segment only */
378         send_hdr->w0.total = pkt_len + dlen_adj;
379         sg->seg1_size = pkt_len + dlen_adj;
380
381         /* Get area where NIX descriptor needs to be stored */
382         nixtx = dptr + pkt_len + dlen_adj;
383         nixtx += BIT_ULL(7);
384         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
385
386         /* Return nixtx addr */
387         *nixtx_addr = (nixtx + 16);
388
389         /* DLEN passed is excluding L2HDR */
390         pkt_len -= l2_len;
391         tag = sa_base & 0xFFFFUL;
392         sa_base &= ~0xFFFFUL;
393         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
394         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
395         ucode_cmd[0] =
396                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
397
398         /* CPT Word 0 and Word 1. Assume no multi-seg support */
399         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
400         /* CPT_RES_S is 16B above NIXTX */
401         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
402
403         /* CPT word 2 and 3 */
404         cmd23 = vdupq_n_u64(0);
405         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
406                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
407         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
408
409         dptr += l2_len;
410
411         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
412                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
413                         *((uint16_t *)(dptr - 2)) =
414                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
415                 else
416                         *((uint16_t *)(dptr - 2)) =
417                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
418         }
419         ucode_cmd[1] = dptr;
420         ucode_cmd[2] = dptr;
421
422         /* Move to our line */
423         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
424
425         /* Write CPT instruction to lmt line */
426         vst1q_u64(laddr, cmd01);
427         vst1q_u64((laddr + 2), cmd23);
428
429         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
430         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
431
432         /* Move to next line for every other CPT inst */
433         *loff = !(*loff);
434         *lnum = *lnum + (*loff ? 0 : 1);
435         *shft = *shft + (*loff ? 0 : 3);
436 }
437
438 #else
439
440 static __rte_always_inline void
441 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
442                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
443                    uint64_t sa_base, const uint16_t flags)
444 {
445         RTE_SET_USED(m);
446         RTE_SET_USED(cmd);
447         RTE_SET_USED(nixtx_addr);
448         RTE_SET_USED(lbase);
449         RTE_SET_USED(lnum);
450         RTE_SET_USED(loff);
451         RTE_SET_USED(shft);
452         RTE_SET_USED(sa_base);
453         RTE_SET_USED(flags);
454 }
455 #endif
456
457 static __rte_always_inline void
458 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
459 {
460         uint64_t mask, ol_flags = m->ol_flags;
461
462         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
463                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
464                 uint16_t *iplen, *oiplen, *oudplen;
465                 uint16_t lso_sb, paylen;
466
467                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
468                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
469                          m->l2_len + m->l3_len + m->l4_len;
470
471                 /* Reduce payload len from base headers */
472                 paylen = m->pkt_len - lso_sb;
473
474                 /* Get iplen position assuming no tunnel hdr */
475                 iplen = (uint16_t *)(mdata + m->l2_len +
476                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
477                 /* Handle tunnel tso */
478                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
479                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
480                         const uint8_t is_udp_tun =
481                                 (CNXK_NIX_UDP_TUN_BITMASK >>
482                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
483                                 0x1;
484
485                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
486                                               (2 << !!(ol_flags &
487                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
488                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
489                                                    paylen);
490
491                         /* Update format for UDP tunneled packet */
492                         if (is_udp_tun) {
493                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
494                                                        m->outer_l3_len + 4);
495                                 *oudplen = rte_cpu_to_be_16(
496                                         rte_be_to_cpu_16(*oudplen) - paylen);
497                         }
498
499                         /* Update iplen position to inner ip hdr */
500                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
501                                              m->l4_len +
502                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
503                 }
504
505                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
506         }
507 }
508
509 static __rte_always_inline void
510 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
511                        const uint64_t lso_tun_fmt, bool *sec)
512 {
513         struct nix_send_ext_s *send_hdr_ext;
514         struct nix_send_hdr_s *send_hdr;
515         uint64_t ol_flags = 0, mask;
516         union nix_send_hdr_w1_u w1;
517         union nix_send_sg_s *sg;
518
519         send_hdr = (struct nix_send_hdr_s *)cmd;
520         if (flags & NIX_TX_NEED_EXT_HDR) {
521                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
522                 sg = (union nix_send_sg_s *)(cmd + 4);
523                 /* Clear previous markings */
524                 send_hdr_ext->w0.lso = 0;
525                 send_hdr_ext->w1.u = 0;
526         } else {
527                 sg = (union nix_send_sg_s *)(cmd + 2);
528         }
529
530         if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
531                 ol_flags = m->ol_flags;
532                 w1.u = 0;
533         }
534
535         if (!(flags & NIX_TX_MULTI_SEG_F))
536                 send_hdr->w0.total = m->data_len;
537         else
538                 send_hdr->w0.total = m->pkt_len;
539         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
540
541         /*
542          * L3type:  2 => IPV4
543          *          3 => IPV4 with csum
544          *          4 => IPV6
545          * L3type and L3ptr needs to be set for either
546          * L3 csum or L4 csum or LSO
547          *
548          */
549
550         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
551             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
552                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
553                 const uint8_t ol3type =
554                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
555                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
556                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
557
558                 /* Outer L3 */
559                 w1.ol3type = ol3type;
560                 mask = 0xffffull << ((!!ol3type) << 4);
561                 w1.ol3ptr = ~mask & m->outer_l2_len;
562                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
563
564                 /* Outer L4 */
565                 w1.ol4type = csum + (csum << 1);
566
567                 /* Inner L3 */
568                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
569                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
570                 w1.il3ptr = w1.ol4ptr + m->l2_len;
571                 w1.il4ptr = w1.il3ptr + m->l3_len;
572                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
573                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
574
575                 /* Inner L4 */
576                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
577
578                 /* In case of no tunnel header use only
579                  * shift IL3/IL4 fields a bit to use
580                  * OL3/OL4 for header checksum
581                  */
582                 mask = !ol3type;
583                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
584                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
585
586         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
587                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
588                 const uint8_t outer_l2_len = m->outer_l2_len;
589
590                 /* Outer L3 */
591                 w1.ol3ptr = outer_l2_len;
592                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
593                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
594                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
595                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
596                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
597
598                 /* Outer L4 */
599                 w1.ol4type = csum + (csum << 1);
600
601         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
602                 const uint8_t l2_len = m->l2_len;
603
604                 /* Always use OLXPTR and OLXTYPE when only
605                  * when one header is present
606                  */
607
608                 /* Inner L3 */
609                 w1.ol3ptr = l2_len;
610                 w1.ol4ptr = l2_len + m->l3_len;
611                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
612                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
613                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
614                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
615
616                 /* Inner L4 */
617                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
618         }
619
620         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
621                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
622                 /* HW will update ptr after vlan0 update */
623                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
624                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
625
626                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
627                 /* 2B before end of l2 header */
628                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
629                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
630         }
631
632         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
633                 uint16_t lso_sb;
634                 uint64_t mask;
635
636                 mask = -(!w1.il3type);
637                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
638
639                 send_hdr_ext->w0.lso_sb = lso_sb;
640                 send_hdr_ext->w0.lso = 1;
641                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
642                 send_hdr_ext->w0.lso_format =
643                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
644                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
645
646                 /* Handle tunnel tso */
647                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
648                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
649                         const uint8_t is_udp_tun =
650                                 (CNXK_NIX_UDP_TUN_BITMASK >>
651                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
652                                 0x1;
653                         uint8_t shift = is_udp_tun ? 32 : 0;
654
655                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
656                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
657
658                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
659                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
660                         /* Update format for UDP tunneled packet */
661                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
662                 }
663         }
664
665         if (flags & NIX_TX_NEED_SEND_HDR_W1)
666                 send_hdr->w1.u = w1.u;
667
668         if (!(flags & NIX_TX_MULTI_SEG_F)) {
669                 sg->seg1_size = send_hdr->w0.total;
670                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
671
672                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
673                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
674                          *              is greater than 1
675                          * DF bit = 0 otherwise
676                          */
677                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
678                 }
679                 /* Mark mempool object as "put" since it is freed by NIX */
680                 if (!send_hdr->w0.df)
681                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
682         } else {
683                 sg->seg1_size = m->data_len;
684                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
685
686                 /* NOFF is handled later for multi-seg */
687         }
688
689         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
690                 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
691 }
692
693 static __rte_always_inline void
694 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
695                            const uint16_t flags)
696 {
697         struct nix_send_ext_s *send_hdr_ext;
698         union nix_send_sg_s *sg;
699
700         /* With minimal offloads, 'cmd' being local could be optimized out to
701          * registers. In other cases, 'cmd' will be in stack. Intent is
702          * 'cmd' stores content from txq->cmd which is copied only once.
703          */
704         *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
705         lmt_addr += 16;
706         if (flags & NIX_TX_NEED_EXT_HDR) {
707                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
708                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
709                 lmt_addr += 16;
710
711                 sg = (union nix_send_sg_s *)(cmd + 4);
712         } else {
713                 sg = (union nix_send_sg_s *)(cmd + 2);
714         }
715         /* In case of multi-seg, sg template is stored here */
716         *((union nix_send_sg_s *)lmt_addr) = *sg;
717         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
718 }
719
720 static __rte_always_inline void
721 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
722                               const uint64_t ol_flags, const uint16_t no_segdw,
723                               const uint16_t flags)
724 {
725         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
726                 const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
727                 struct nix_send_ext_s *send_hdr_ext =
728                         (struct nix_send_ext_s *)lmt_addr + 16;
729                 uint64_t *lmt = (uint64_t *)lmt_addr;
730                 uint16_t off = (no_segdw - 1) << 1;
731                 struct nix_send_mem_s *send_mem;
732
733                 send_mem = (struct nix_send_mem_s *)(lmt + off);
734                 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
735                 send_hdr_ext->w0.tstmp = 1;
736                 if (flags & NIX_TX_MULTI_SEG_F) {
737                         /* Retrieving the default desc values */
738                         lmt[off] = cmd[2];
739
740                         /* Using compiler barrier to avoid violation of C
741                          * aliasing rules.
742                          */
743                         rte_compiler_barrier();
744                 }
745
746                 /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
747                  * should not be recorded, hence changing the alg type to
748                  * NIX_SENDMEMALG_SET and also changing send mem addr field to
749                  * next 8 bytes as it corrupts the actual Tx tstamp registered
750                  * address.
751                  */
752                 send_mem->w0.subdc = NIX_SUBDC_MEM;
753                 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
754                 send_mem->addr =
755                         (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
756         }
757 }
758
759 static __rte_always_inline uint16_t
760 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
761 {
762         struct nix_send_hdr_s *send_hdr;
763         union nix_send_sg_s *sg;
764         struct rte_mbuf *m_next;
765         uint64_t *slist, sg_u;
766         uint64_t nb_segs;
767         uint64_t segdw;
768         uint8_t off, i;
769
770         send_hdr = (struct nix_send_hdr_s *)cmd;
771
772         if (flags & NIX_TX_NEED_EXT_HDR)
773                 off = 2;
774         else
775                 off = 0;
776
777         sg = (union nix_send_sg_s *)&cmd[2 + off];
778
779         /* Start from second segment, first segment is already there */
780         i = 1;
781         sg_u = sg->u;
782         nb_segs = m->nb_segs - 1;
783         m_next = m->next;
784         slist = &cmd[3 + off + 1];
785
786         /* Set invert df if buffer is not to be freed by H/W */
787         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
788                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
789
790                 /* Mark mempool object as "put" since it is freed by NIX */
791 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
792         if (!(sg_u & (1ULL << 55)))
793                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
794         rte_io_wmb();
795 #endif
796         m = m_next;
797         if (!m)
798                 goto done;
799
800         /* Fill mbuf segments */
801         do {
802                 m_next = m->next;
803                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
804                 *slist = rte_mbuf_data_iova(m);
805                 /* Set invert df if buffer is not to be freed by H/W */
806                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
807                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
808                         /* Mark mempool object as "put" since it is freed by NIX
809                          */
810 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
811                 if (!(sg_u & (1ULL << (i + 55))))
812                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
813 #endif
814                 slist++;
815                 i++;
816                 nb_segs--;
817                 if (i > 2 && nb_segs) {
818                         i = 0;
819                         /* Next SG subdesc */
820                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
821                         sg->u = sg_u;
822                         sg->segs = 3;
823                         sg = (union nix_send_sg_s *)slist;
824                         sg_u = sg->u;
825                         slist++;
826                 }
827                 m = m_next;
828         } while (nb_segs);
829
830 done:
831         sg->u = sg_u;
832         sg->segs = i;
833         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
834         /* Roundup extra dwords to multiple of 2 */
835         segdw = (segdw >> 1) + (segdw & 0x1);
836         /* Default dwords */
837         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
838         send_hdr->w0.sizem1 = segdw - 1;
839
840         return segdw;
841 }
842
843 static __rte_always_inline uint16_t
844 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
845                     uint64_t *cmd, uintptr_t base, const uint16_t flags)
846 {
847         struct cn10k_eth_txq *txq = tx_queue;
848         const rte_iova_t io_addr = txq->io_addr;
849         uint8_t lnum, c_lnum, c_shft, c_loff;
850         uintptr_t pa, lbase = txq->lmt_base;
851         uint16_t lmt_id, burst, left, i;
852         uintptr_t c_lbase = lbase;
853         rte_iova_t c_io_addr;
854         uint64_t lso_tun_fmt;
855         uint16_t c_lmt_id;
856         uint64_t sa_base;
857         uintptr_t laddr;
858         uint64_t data;
859         bool sec;
860
861         if (!(flags & NIX_TX_VWQE_F)) {
862                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
863                 /* Reduce the cached count */
864                 txq->fc_cache_pkts -= pkts;
865         }
866
867         /* Get cmd skeleton */
868         cn10k_nix_tx_skeleton(txq, cmd, flags);
869
870         if (flags & NIX_TX_OFFLOAD_TSO_F)
871                 lso_tun_fmt = txq->lso_tun_fmt;
872
873         /* Get LMT base address and LMT ID as lcore id */
874         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
875         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
876                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
877                 c_io_addr = txq->cpt_io_addr;
878                 sa_base = txq->sa_base;
879         }
880
881         left = pkts;
882 again:
883         burst = left > 32 ? 32 : left;
884
885         lnum = 0;
886         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
887                 c_lnum = 0;
888                 c_loff = 0;
889                 c_shft = 16;
890         }
891
892         for (i = 0; i < burst; i++) {
893                 /* Perform header writes for TSO, barrier at
894                  * lmt steorl will suffice.
895                  */
896                 if (flags & NIX_TX_OFFLOAD_TSO_F)
897                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
898
899                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
900                                        &sec);
901
902                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
903
904                 /* Prepare CPT instruction and get nixtx addr */
905                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
906                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
907                                            &c_lnum, &c_loff, &c_shft, sa_base,
908                                            flags);
909
910                 /* Move NIX desc to LMT/NIXTX area */
911                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
912                 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
913                                               tx_pkts[i]->ol_flags, 4, flags);
914                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
915                         lnum++;
916         }
917
918         if (flags & NIX_TX_VWQE_F)
919                 roc_sso_hws_head_wait(base);
920
921         left -= burst;
922         tx_pkts += burst;
923
924         /* Submit CPT instructions if any */
925         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
926                 /* Reduce pkts to be sent to CPT */
927                 burst -= ((c_lnum << 1) + c_loff);
928                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
929                                      c_shft);
930         }
931
932         /* Trigger LMTST */
933         if (burst > 16) {
934                 data = cn10k_nix_tx_steor_data(flags);
935                 pa = io_addr | (data & 0x7) << 4;
936                 data &= ~0x7ULL;
937                 data |= (15ULL << 12);
938                 data |= (uint64_t)lmt_id;
939
940                 /* STEOR0 */
941                 roc_lmt_submit_steorl(data, pa);
942
943                 data = cn10k_nix_tx_steor_data(flags);
944                 pa = io_addr | (data & 0x7) << 4;
945                 data &= ~0x7ULL;
946                 data |= ((uint64_t)(burst - 17)) << 12;
947                 data |= (uint64_t)(lmt_id + 16);
948
949                 /* STEOR1 */
950                 roc_lmt_submit_steorl(data, pa);
951         } else if (burst) {
952                 data = cn10k_nix_tx_steor_data(flags);
953                 pa = io_addr | (data & 0x7) << 4;
954                 data &= ~0x7ULL;
955                 data |= ((uint64_t)(burst - 1)) << 12;
956                 data |= lmt_id;
957
958                 /* STEOR0 */
959                 roc_lmt_submit_steorl(data, pa);
960         }
961
962         rte_io_wmb();
963         if (left)
964                 goto again;
965
966         return pkts;
967 }
968
969 static __rte_always_inline uint16_t
970 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
971                          uint16_t pkts, uint64_t *cmd, uintptr_t base,
972                          const uint16_t flags)
973 {
974         struct cn10k_eth_txq *txq = tx_queue;
975         uintptr_t pa0, pa1, lbase = txq->lmt_base;
976         const rte_iova_t io_addr = txq->io_addr;
977         uint16_t segdw, lmt_id, burst, left, i;
978         uint8_t lnum, c_lnum, c_loff;
979         uintptr_t c_lbase = lbase;
980         uint64_t data0, data1;
981         rte_iova_t c_io_addr;
982         uint64_t lso_tun_fmt;
983         uint8_t shft, c_shft;
984         __uint128_t data128;
985         uint16_t c_lmt_id;
986         uint64_t sa_base;
987         uintptr_t laddr;
988         bool sec;
989
990         NIX_XMIT_FC_OR_RETURN(txq, pkts);
991
992         cn10k_nix_tx_skeleton(txq, cmd, flags);
993
994         /* Reduce the cached count */
995         txq->fc_cache_pkts -= pkts;
996
997         if (flags & NIX_TX_OFFLOAD_TSO_F)
998                 lso_tun_fmt = txq->lso_tun_fmt;
999
1000         /* Get LMT base address and LMT ID as lcore id */
1001         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1002         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1003                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1004                 c_io_addr = txq->cpt_io_addr;
1005                 sa_base = txq->sa_base;
1006         }
1007
1008         left = pkts;
1009 again:
1010         burst = left > 32 ? 32 : left;
1011         shft = 16;
1012         data128 = 0;
1013
1014         lnum = 0;
1015         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1016                 c_lnum = 0;
1017                 c_loff = 0;
1018                 c_shft = 16;
1019         }
1020
1021         for (i = 0; i < burst; i++) {
1022                 /* Perform header writes for TSO, barrier at
1023                  * lmt steorl will suffice.
1024                  */
1025                 if (flags & NIX_TX_OFFLOAD_TSO_F)
1026                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1027
1028                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1029                                        &sec);
1030
1031                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1032
1033                 /* Prepare CPT instruction and get nixtx addr */
1034                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1035                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1036                                            &c_lnum, &c_loff, &c_shft, sa_base,
1037                                            flags);
1038
1039                 /* Move NIX desc to LMT/NIXTX area */
1040                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1041
1042                 /* Store sg list directly on lmt line */
1043                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1044                                                flags);
1045                 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
1046                                               tx_pkts[i]->ol_flags, segdw,
1047                                               flags);
1048                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1049                         lnum++;
1050                         data128 |= (((__uint128_t)(segdw - 1)) << shft);
1051                         shft += 3;
1052                 }
1053         }
1054
1055         if (flags & NIX_TX_VWQE_F)
1056                 roc_sso_hws_head_wait(base);
1057
1058         left -= burst;
1059         tx_pkts += burst;
1060
1061         /* Submit CPT instructions if any */
1062         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1063                 /* Reduce pkts to be sent to CPT */
1064                 burst -= ((c_lnum << 1) + c_loff);
1065                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1066                                      c_shft);
1067         }
1068
1069         data0 = (uint64_t)data128;
1070         data1 = (uint64_t)(data128 >> 64);
1071         /* Make data0 similar to data1 */
1072         data0 >>= 16;
1073         /* Trigger LMTST */
1074         if (burst > 16) {
1075                 pa0 = io_addr | (data0 & 0x7) << 4;
1076                 data0 &= ~0x7ULL;
1077                 /* Move lmtst1..15 sz to bits 63:19 */
1078                 data0 <<= 16;
1079                 data0 |= (15ULL << 12);
1080                 data0 |= (uint64_t)lmt_id;
1081
1082                 /* STEOR0 */
1083                 roc_lmt_submit_steorl(data0, pa0);
1084
1085                 pa1 = io_addr | (data1 & 0x7) << 4;
1086                 data1 &= ~0x7ULL;
1087                 data1 <<= 16;
1088                 data1 |= ((uint64_t)(burst - 17)) << 12;
1089                 data1 |= (uint64_t)(lmt_id + 16);
1090
1091                 /* STEOR1 */
1092                 roc_lmt_submit_steorl(data1, pa1);
1093         } else if (burst) {
1094                 pa0 = io_addr | (data0 & 0x7) << 4;
1095                 data0 &= ~0x7ULL;
1096                 /* Move lmtst1..15 sz to bits 63:19 */
1097                 data0 <<= 16;
1098                 data0 |= ((burst - 1) << 12);
1099                 data0 |= (uint64_t)lmt_id;
1100
1101                 /* STEOR0 */
1102                 roc_lmt_submit_steorl(data0, pa0);
1103         }
1104
1105         rte_io_wmb();
1106         if (left)
1107                 goto again;
1108
1109         return pkts;
1110 }
1111
1112 #if defined(RTE_ARCH_ARM64)
1113
1114 static __rte_always_inline void
1115 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1116                       union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1117                       const uint64_t flags, const uint64_t lso_tun_fmt)
1118 {
1119         uint16_t lso_sb;
1120         uint64_t mask;
1121
1122         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1123                 return;
1124
1125         mask = -(!w1->il3type);
1126         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1127
1128         w0->u |= BIT(14);
1129         w0->lso_sb = lso_sb;
1130         w0->lso_mps = m->tso_segsz;
1131         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1132         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1133
1134         /* Handle tunnel tso */
1135         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1136             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1137                 const uint8_t is_udp_tun =
1138                         (CNXK_NIX_UDP_TUN_BITMASK >>
1139                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1140                         0x1;
1141                 uint8_t shift = is_udp_tun ? 32 : 0;
1142
1143                 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1144                 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1145
1146                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1147                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1148                 /* Update format for UDP tunneled packet */
1149
1150                 w0->lso_format = (lso_tun_fmt >> shift);
1151         }
1152 }
1153
1154 static __rte_always_inline void
1155 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1156                                 union nix_send_hdr_w0_u *sh,
1157                                 union nix_send_sg_s *sg, const uint32_t flags)
1158 {
1159         struct rte_mbuf *m_next;
1160         uint64_t *slist, sg_u;
1161         uint16_t nb_segs;
1162         int i = 1;
1163
1164         sh->total = m->pkt_len;
1165         /* Clear sg->u header before use */
1166         sg->u &= 0xFC00000000000000;
1167         sg_u = sg->u;
1168         slist = &cmd[0];
1169
1170         sg_u = sg_u | ((uint64_t)m->data_len);
1171
1172         nb_segs = m->nb_segs - 1;
1173         m_next = m->next;
1174
1175         /* Set invert df if buffer is not to be freed by H/W */
1176         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1177                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1178                 /* Mark mempool object as "put" since it is freed by NIX */
1179 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1180         if (!(sg_u & (1ULL << 55)))
1181                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1182         rte_io_wmb();
1183 #endif
1184
1185         m = m_next;
1186         /* Fill mbuf segments */
1187         do {
1188                 m_next = m->next;
1189                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1190                 *slist = rte_mbuf_data_iova(m);
1191                 /* Set invert df if buffer is not to be freed by H/W */
1192                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1193                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1194                         /* Mark mempool object as "put" since it is freed by NIX
1195                          */
1196 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1197                 if (!(sg_u & (1ULL << (i + 55))))
1198                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1199                 rte_io_wmb();
1200 #endif
1201                 slist++;
1202                 i++;
1203                 nb_segs--;
1204                 if (i > 2 && nb_segs) {
1205                         i = 0;
1206                         /* Next SG subdesc */
1207                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1208                         sg->u = sg_u;
1209                         sg->segs = 3;
1210                         sg = (union nix_send_sg_s *)slist;
1211                         sg_u = sg->u;
1212                         slist++;
1213                 }
1214                 m = m_next;
1215         } while (nb_segs);
1216
1217         sg->u = sg_u;
1218         sg->segs = i;
1219 }
1220
1221 static __rte_always_inline void
1222 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1223                            uint64x2_t *cmd1, const uint8_t segdw,
1224                            const uint32_t flags)
1225 {
1226         union nix_send_hdr_w0_u sh;
1227         union nix_send_sg_s sg;
1228
1229         if (m->nb_segs == 1) {
1230                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1231                         sg.u = vgetq_lane_u64(cmd1[0], 0);
1232                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1233                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1234                 }
1235
1236 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1237                 sg.u = vgetq_lane_u64(cmd1[0], 0);
1238                 if (!(sg.u & (1ULL << 55)))
1239                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1240                 rte_io_wmb();
1241 #endif
1242                 return;
1243         }
1244
1245         sh.u = vgetq_lane_u64(cmd0[0], 0);
1246         sg.u = vgetq_lane_u64(cmd1[0], 0);
1247
1248         cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1249
1250         sh.sizem1 = segdw - 1;
1251         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1252         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1253 }
1254
1255 #define NIX_DESCS_PER_LOOP 4
1256
1257 static __rte_always_inline uint8_t
1258 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1259                                uint64x2_t *cmd1, uint64x2_t *cmd2,
1260                                uint64x2_t *cmd3, uint8_t *segdw,
1261                                uint64_t *lmt_addr, __uint128_t *data128,
1262                                uint8_t *shift, const uint16_t flags)
1263 {
1264         uint8_t j, off, lmt_used;
1265
1266         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1267             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1268                 /* No segments in 4 consecutive packets. */
1269                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1270                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1271                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1272                                                            &cmd0[j], &cmd1[j],
1273                                                            segdw[j], flags);
1274                         vst1q_u64(lmt_addr, cmd0[0]);
1275                         vst1q_u64(lmt_addr + 2, cmd1[0]);
1276                         vst1q_u64(lmt_addr + 4, cmd0[1]);
1277                         vst1q_u64(lmt_addr + 6, cmd1[1]);
1278                         vst1q_u64(lmt_addr + 8, cmd0[2]);
1279                         vst1q_u64(lmt_addr + 10, cmd1[2]);
1280                         vst1q_u64(lmt_addr + 12, cmd0[3]);
1281                         vst1q_u64(lmt_addr + 14, cmd1[3]);
1282
1283                         *data128 |= ((__uint128_t)7) << *shift;
1284                         *shift += 3;
1285
1286                         return 1;
1287                 }
1288         }
1289
1290         lmt_used = 0;
1291         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1292                 /* Fit consecutive packets in same LMTLINE. */
1293                 if ((segdw[j] + segdw[j + 1]) <= 8) {
1294                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1295                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1296                                                            &cmd0[j], &cmd1[j],
1297                                                            segdw[j], flags);
1298                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1299                                                            &cmd0[j + 1],
1300                                                            &cmd1[j + 1],
1301                                                            segdw[j + 1], flags);
1302                                 /* TSTAMP takes 4 each, no segs. */
1303                                 vst1q_u64(lmt_addr, cmd0[j]);
1304                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1305                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1306                                 vst1q_u64(lmt_addr + 6, cmd3[j]);
1307
1308                                 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1309                                 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1310                                 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1311                                 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1312                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1313                                 /* EXT header take 3 each, space for 2 segs.*/
1314                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1315                                                            lmt_addr + 6,
1316                                                            &cmd0[j], &cmd1[j],
1317                                                            segdw[j], flags);
1318                                 vst1q_u64(lmt_addr, cmd0[j]);
1319                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1320                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1321                                 off = segdw[j] - 3;
1322                                 off <<= 1;
1323                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1324                                                            lmt_addr + 12 + off,
1325                                                            &cmd0[j + 1],
1326                                                            &cmd1[j + 1],
1327                                                            segdw[j + 1], flags);
1328                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1329                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1330                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1331                         } else {
1332                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1333                                                            lmt_addr + 4,
1334                                                            &cmd0[j], &cmd1[j],
1335                                                            segdw[j], flags);
1336                                 vst1q_u64(lmt_addr, cmd0[j]);
1337                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1338                                 off = segdw[j] - 2;
1339                                 off <<= 1;
1340                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1341                                                            lmt_addr + 8 + off,
1342                                                            &cmd0[j + 1],
1343                                                            &cmd1[j + 1],
1344                                                            segdw[j + 1], flags);
1345                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1346                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1347                         }
1348                         *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1349                                     << *shift;
1350                         *shift += 3;
1351                         j += 2;
1352                 } else {
1353                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
1354                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1355                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1356                                                            lmt_addr + 6,
1357                                                            &cmd0[j], &cmd1[j],
1358                                                            segdw[j], flags);
1359                                 vst1q_u64(lmt_addr, cmd0[j]);
1360                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1361                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1362                                 off = segdw[j] - 4;
1363                                 off <<= 1;
1364                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1365                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1366                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1367                                                            lmt_addr + 6,
1368                                                            &cmd0[j], &cmd1[j],
1369                                                            segdw[j], flags);
1370                                 vst1q_u64(lmt_addr, cmd0[j]);
1371                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1372                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1373                         } else {
1374                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1375                                                            lmt_addr + 4,
1376                                                            &cmd0[j], &cmd1[j],
1377                                                            segdw[j], flags);
1378                                 vst1q_u64(lmt_addr, cmd0[j]);
1379                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1380                         }
1381                         *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1382                         *shift += 3;
1383                         j++;
1384                 }
1385                 lmt_used++;
1386                 lmt_addr += 16;
1387         }
1388
1389         return lmt_used;
1390 }
1391
1392 static __rte_always_inline void
1393 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1394                    uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1395 {
1396         /* Go to next line if we are out of space */
1397         if ((*loff + (dw << 4)) > 128) {
1398                 *data128 = *data128 |
1399                            (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1400                 *shift = *shift + 3;
1401                 *loff = 0;
1402                 *lnum = *lnum + 1;
1403         }
1404
1405         *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1406         *loff = *loff + (dw << 4);
1407 }
1408
1409 static __rte_always_inline void
1410 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1411                      uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1412                      uint64x2_t cmd3, const uint16_t flags)
1413 {
1414         uint8_t off;
1415
1416         /* Handle no fast free when security is enabled without mseg */
1417         if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1418             (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1419             !(flags & NIX_TX_MULTI_SEG_F)) {
1420                 union nix_send_sg_s sg;
1421
1422                 sg.u = vgetq_lane_u64(cmd1, 0);
1423                 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1424                 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1425
1426 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1427                 sg.u = vgetq_lane_u64(cmd1, 0);
1428                 if (!(sg.u & (1ULL << 55)))
1429                         RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1430                                                 0);
1431                 rte_io_wmb();
1432 #endif
1433         }
1434         if (flags & NIX_TX_MULTI_SEG_F) {
1435                 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1436                     (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1437                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1438                                                    &cmd0, &cmd1, segdw, flags);
1439                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1440                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1441                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1442                         off = segdw - 4;
1443                         off <<= 4;
1444                         vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1445                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1446                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1447                                                    &cmd0, &cmd1, segdw, flags);
1448                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1449                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1450                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1451                 } else {
1452                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1453                                                    &cmd0, &cmd1, segdw, flags);
1454                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1455                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1456                 }
1457         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1458                 /* Store the prepared send desc to LMT lines */
1459                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1460                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1461                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1462                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1463                         vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1464                 } else {
1465                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1466                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1467                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1468                 }
1469         } else {
1470                 /* Store the prepared send desc to LMT lines */
1471                 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1472                 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1473         }
1474 }
1475
1476 static __rte_always_inline uint16_t
1477 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1478                            uint16_t pkts, uint64_t *cmd, uintptr_t base,
1479                            const uint16_t flags)
1480 {
1481         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1482         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1483         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1484                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1485         uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1486         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1487         uint64x2_t senddesc01_w0, senddesc23_w0;
1488         uint64x2_t senddesc01_w1, senddesc23_w1;
1489         uint64x2_t sendext01_w0, sendext23_w0;
1490         uint64x2_t sendext01_w1, sendext23_w1;
1491         uint64x2_t sendmem01_w0, sendmem23_w0;
1492         uint64x2_t sendmem01_w1, sendmem23_w1;
1493         uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1494         uint64x2_t sgdesc01_w0, sgdesc23_w0;
1495         uint64x2_t sgdesc01_w1, sgdesc23_w1;
1496         struct cn10k_eth_txq *txq = tx_queue;
1497         rte_iova_t io_addr = txq->io_addr;
1498         uintptr_t laddr = txq->lmt_base;
1499         uint8_t c_lnum, c_shft, c_loff;
1500         uint64x2_t ltypes01, ltypes23;
1501         uint64x2_t xtmp128, ytmp128;
1502         uint64x2_t xmask01, xmask23;
1503         uintptr_t c_laddr = laddr;
1504         uint8_t lnum, shift, loff;
1505         rte_iova_t c_io_addr;
1506         uint64_t sa_base;
1507         union wdata {
1508                 __uint128_t data128;
1509                 uint64_t data[2];
1510         } wd;
1511
1512         if (!(flags & NIX_TX_VWQE_F)) {
1513                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1514                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1515                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1516                 /* Reduce the cached count */
1517                 txq->fc_cache_pkts -= pkts;
1518         } else {
1519                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1520                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1521         }
1522
1523         /* Perform header writes before barrier for TSO */
1524         if (flags & NIX_TX_OFFLOAD_TSO_F) {
1525                 for (i = 0; i < pkts; i++)
1526                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1527         }
1528
1529         senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1530         senddesc23_w0 = senddesc01_w0;
1531         senddesc01_w1 = vdupq_n_u64(0);
1532         senddesc23_w1 = senddesc01_w1;
1533         sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1534         sgdesc23_w0 = sgdesc01_w0;
1535
1536         /* Load command defaults into vector variables. */
1537         if (flags & NIX_TX_NEED_EXT_HDR) {
1538                 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1539                 sendext23_w0 = sendext01_w0;
1540                 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1541                 sendext23_w1 = sendext01_w1;
1542                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1543                         sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1544                         sendmem23_w0 = sendmem01_w0;
1545                         sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1546                         sendmem23_w1 = sendmem01_w1;
1547                 }
1548         }
1549
1550         /* Get LMT base address and LMT ID as lcore id */
1551         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1552         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1553                 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1554                 c_io_addr = txq->cpt_io_addr;
1555                 sa_base = txq->sa_base;
1556         }
1557
1558         left = pkts;
1559 again:
1560         /* Number of packets to prepare depends on offloads enabled. */
1561         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1562                               cn10k_nix_pkts_per_vec_brst(flags) :
1563                               left;
1564         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1565                 wd.data128 = 0;
1566                 shift = 16;
1567         }
1568         lnum = 0;
1569         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1570                 loff = 0;
1571                 c_loff = 0;
1572                 c_lnum = 0;
1573                 c_shft = 16;
1574         }
1575
1576         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1577                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1578                         burst = i;
1579                         break;
1580                 }
1581
1582                 if (flags & NIX_TX_MULTI_SEG_F) {
1583                         uint8_t j;
1584
1585                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1586                                 struct rte_mbuf *m = tx_pkts[j];
1587
1588                                 /* Get dwords based on nb_segs. */
1589                                 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1590                                 /* Add dwords based on offloads. */
1591                                 segdw[j] += 1 + /* SEND HDR */
1592                                             !!(flags & NIX_TX_NEED_EXT_HDR) +
1593                                             !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1594                         }
1595
1596                         /* Check if there are enough LMTLINES for this loop */
1597                         if (lnum + 4 > 32) {
1598                                 uint8_t ldwords_con = 0, lneeded = 0;
1599                                 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1600                                         ldwords_con += segdw[j];
1601                                         if (ldwords_con > 8) {
1602                                                 lneeded += 1;
1603                                                 ldwords_con = segdw[j];
1604                                         }
1605                                 }
1606                                 lneeded += 1;
1607                                 if (lnum + lneeded > 32) {
1608                                         burst = i;
1609                                         break;
1610                                 }
1611                         }
1612                 }
1613                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1614                 senddesc01_w0 =
1615                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1616                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1617
1618                 senddesc23_w0 = senddesc01_w0;
1619                 sgdesc23_w0 = sgdesc01_w0;
1620
1621                 /* Clear vlan enables. */
1622                 if (flags & NIX_TX_NEED_EXT_HDR) {
1623                         sendext01_w1 = vbicq_u64(sendext01_w1,
1624                                                  vdupq_n_u64(0x3FFFF00FFFF00));
1625                         sendext23_w1 = sendext01_w1;
1626                 }
1627
1628                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1629                         /* Reset send mem alg to SETTSTMP from SUB*/
1630                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
1631                                                  vdupq_n_u64(BIT_ULL(59)));
1632                         /* Reset send mem address to default. */
1633                         sendmem01_w1 =
1634                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1635                         sendmem23_w0 = sendmem01_w0;
1636                         sendmem23_w1 = sendmem01_w1;
1637                 }
1638
1639                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1640                         /* Clear the LSO enable bit. */
1641                         sendext01_w0 = vbicq_u64(sendext01_w0,
1642                                                  vdupq_n_u64(BIT_ULL(14)));
1643                         sendext23_w0 = sendext01_w0;
1644                 }
1645
1646                 /* Move mbufs to iova */
1647                 mbuf0 = (uint64_t *)tx_pkts[0];
1648                 mbuf1 = (uint64_t *)tx_pkts[1];
1649                 mbuf2 = (uint64_t *)tx_pkts[2];
1650                 mbuf3 = (uint64_t *)tx_pkts[3];
1651
1652                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1653                                      offsetof(struct rte_mbuf, buf_iova));
1654                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1655                                      offsetof(struct rte_mbuf, buf_iova));
1656                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1657                                      offsetof(struct rte_mbuf, buf_iova));
1658                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1659                                      offsetof(struct rte_mbuf, buf_iova));
1660                 /*
1661                  * Get mbuf's, olflags, iova, pktlen, dataoff
1662                  * dataoff_iovaX.D[0] = iova,
1663                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1664                  * len_olflagsX.D[0] = ol_flags,
1665                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1666                  */
1667                 dataoff_iova0 = vld1q_u64(mbuf0);
1668                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1669                 dataoff_iova1 = vld1q_u64(mbuf1);
1670                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1671                 dataoff_iova2 = vld1q_u64(mbuf2);
1672                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1673                 dataoff_iova3 = vld1q_u64(mbuf3);
1674                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1675
1676                 /* Move mbufs to point pool */
1677                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1678                                      offsetof(struct rte_mbuf, pool) -
1679                                      offsetof(struct rte_mbuf, buf_iova));
1680                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1681                                      offsetof(struct rte_mbuf, pool) -
1682                                      offsetof(struct rte_mbuf, buf_iova));
1683                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1684                                      offsetof(struct rte_mbuf, pool) -
1685                                      offsetof(struct rte_mbuf, buf_iova));
1686                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1687                                      offsetof(struct rte_mbuf, pool) -
1688                                      offsetof(struct rte_mbuf, buf_iova));
1689
1690                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1691                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1692                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1693                         /*
1694                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1695                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1696                          */
1697
1698                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1699                                      : [a] "+w"(senddesc01_w1)
1700                                      : [in] "r"(mbuf0 + 2)
1701                                      : "memory");
1702
1703                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1704                                      : [a] "+w"(senddesc01_w1)
1705                                      : [in] "r"(mbuf1 + 2)
1706                                      : "memory");
1707
1708                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1709                                      : [b] "+w"(senddesc23_w1)
1710                                      : [in] "r"(mbuf2 + 2)
1711                                      : "memory");
1712
1713                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1714                                      : [b] "+w"(senddesc23_w1)
1715                                      : [in] "r"(mbuf3 + 2)
1716                                      : "memory");
1717
1718                         /* Get pool pointer alone */
1719                         mbuf0 = (uint64_t *)*mbuf0;
1720                         mbuf1 = (uint64_t *)*mbuf1;
1721                         mbuf2 = (uint64_t *)*mbuf2;
1722                         mbuf3 = (uint64_t *)*mbuf3;
1723                 } else {
1724                         /* Get pool pointer alone */
1725                         mbuf0 = (uint64_t *)*mbuf0;
1726                         mbuf1 = (uint64_t *)*mbuf1;
1727                         mbuf2 = (uint64_t *)*mbuf2;
1728                         mbuf3 = (uint64_t *)*mbuf3;
1729                 }
1730
1731                 const uint8x16_t shuf_mask2 = {
1732                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1733                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1734                 };
1735                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1736                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1737
1738                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1739                 const uint64x2_t and_mask0 = {
1740                         0xFFFFFFFFFFFFFFFF,
1741                         0x000000000000FFFF,
1742                 };
1743
1744                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1745                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1746                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1747                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1748
1749                 /*
1750                  * Pick only 16 bits of pktlen preset at bits 63:32
1751                  * and place them at bits 15:0.
1752                  */
1753                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1754                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1755
1756                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1757                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1758                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1759
1760                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1761                  * pktlen at 15:0 position.
1762                  */
1763                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1764                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1765                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1766                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1767
1768                 /* Move mbuf to point to pool_id. */
1769                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1770                                      offsetof(struct rte_mempool, pool_id));
1771                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1772                                      offsetof(struct rte_mempool, pool_id));
1773                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1774                                      offsetof(struct rte_mempool, pool_id));
1775                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1776                                      offsetof(struct rte_mempool, pool_id));
1777
1778                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1779                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1780                         /*
1781                          * Lookup table to translate ol_flags to
1782                          * il3/il4 types. But we still use ol3/ol4 types in
1783                          * senddesc_w1 as only one header processing is enabled.
1784                          */
1785                         const uint8x16_t tbl = {
1786                                 /* [0-15] = il4type:il3type */
1787                                 0x04, /* none (IPv6 assumed) */
1788                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1789                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1790                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1791                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1792                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1793                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1794                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1795                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1796                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1797                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1798                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1799                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1800                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1801                                        * RTE_MBUF_F_TX_TCP_CKSUM
1802                                        */
1803                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1804                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1805                                        */
1806                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1807                                        * RTE_MBUF_F_TX_UDP_CKSUM
1808                                        */
1809                         };
1810
1811                         /* Extract olflags to translate to iltypes */
1812                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1813                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1814
1815                         /*
1816                          * E(47):L3_LEN(9):L2_LEN(7+z)
1817                          * E(47):L3_LEN(9):L2_LEN(7+z)
1818                          */
1819                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1820                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1821
1822                         /* Move OLFLAGS bits 55:52 to 51:48
1823                          * with zeros preprended on the byte and rest
1824                          * don't care
1825                          */
1826                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1827                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1828                         /*
1829                          * E(48):L3_LEN(8):L2_LEN(z+7)
1830                          * E(48):L3_LEN(8):L2_LEN(z+7)
1831                          */
1832                         const int8x16_t tshft3 = {
1833                                 -1, 0, 8, 8, 8, 8, 8, 8,
1834                                 -1, 0, 8, 8, 8, 8, 8, 8,
1835                         };
1836
1837                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1838                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1839
1840                         /* Do the lookup */
1841                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1842                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1843
1844                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1845                          * and place it in ol3/ol4type of senddesc_w1
1846                          */
1847                         const uint8x16_t shuf_mask0 = {
1848                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1849                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1850                         };
1851
1852                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1853                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1854
1855                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1856                          * a [E(32):E(16):OL3(8):OL2(8)]
1857                          * a = a + (a << 8)
1858                          * a [E(32):E(16):(OL3+OL2):OL2]
1859                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1860                          */
1861                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1862                                                  vshlq_n_u16(senddesc01_w1, 8));
1863                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1864                                                  vshlq_n_u16(senddesc23_w1, 8));
1865
1866                         /* Move ltypes to senddesc*_w1 */
1867                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1868                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1869                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1870                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1871                         /*
1872                          * Lookup table to translate ol_flags to
1873                          * ol3/ol4 types.
1874                          */
1875
1876                         const uint8x16_t tbl = {
1877                                 /* [0-15] = ol4type:ol3type */
1878                                 0x00, /* none */
1879                                 0x03, /* OUTER_IP_CKSUM */
1880                                 0x02, /* OUTER_IPV4 */
1881                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1882                                 0x04, /* OUTER_IPV6 */
1883                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1884                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1885                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1886                                        * OUTER_IP_CKSUM
1887                                        */
1888                                 0x00, /* OUTER_UDP_CKSUM */
1889                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1890                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1891                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1892                                        * OUTER_IP_CKSUM
1893                                        */
1894                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1895                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1896                                        * OUTER_IP_CKSUM
1897                                        */
1898                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1899                                        * OUTER_IPV4
1900                                        */
1901                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1902                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1903                                        */
1904                         };
1905
1906                         /* Extract olflags to translate to iltypes */
1907                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1908                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1909
1910                         /*
1911                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1912                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1913                          */
1914                         const uint8x16_t shuf_mask5 = {
1915                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1916                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1917                         };
1918                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1919                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1920
1921                         /* Extract outer ol flags only */
1922                         const uint64x2_t o_cksum_mask = {
1923                                 0x1C00020000000000,
1924                                 0x1C00020000000000,
1925                         };
1926
1927                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1928                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1929
1930                         /* Extract OUTER_UDP_CKSUM bit 41 and
1931                          * move it to bit 61
1932                          */
1933
1934                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1935                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1936
1937                         /* Shift oltype by 2 to start nibble from BIT(56)
1938                          * instead of BIT(58)
1939                          */
1940                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1941                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1942                         /*
1943                          * E(48):L3_LEN(8):L2_LEN(z+7)
1944                          * E(48):L3_LEN(8):L2_LEN(z+7)
1945                          */
1946                         const int8x16_t tshft3 = {
1947                                 -1, 0, 8, 8, 8, 8, 8, 8,
1948                                 -1, 0, 8, 8, 8, 8, 8, 8,
1949                         };
1950
1951                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1952                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1953
1954                         /* Do the lookup */
1955                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1956                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1957
1958                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1959                          * and place it in ol3/ol4type of senddesc_w1
1960                          */
1961                         const uint8x16_t shuf_mask0 = {
1962                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1963                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1964                         };
1965
1966                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1967                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1968
1969                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1970                          * a [E(32):E(16):OL3(8):OL2(8)]
1971                          * a = a + (a << 8)
1972                          * a [E(32):E(16):(OL3+OL2):OL2]
1973                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1974                          */
1975                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1976                                                  vshlq_n_u16(senddesc01_w1, 8));
1977                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1978                                                  vshlq_n_u16(senddesc23_w1, 8));
1979
1980                         /* Move ltypes to senddesc*_w1 */
1981                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1982                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1983                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1984                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1985                         /* Lookup table to translate ol_flags to
1986                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1987                          */
1988                         const uint8x16x2_t tbl = {{
1989                                 {
1990                                         /* [0-15] = il4type:il3type */
1991                                         0x04, /* none (IPv6) */
1992                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1993                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1994                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1995                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1996                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1997                                                * RTE_MBUF_F_TX_TCP_CKSUM
1998                                                */
1999                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2000                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2001                                                */
2002                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2003                                                * RTE_MBUF_F_TX_UDP_CKSUM
2004                                                */
2005                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
2006                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
2007                                                * RTE_MBUF_F_TX_TCP_CKSUM
2008                                                */
2009                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
2010                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2011                                                */
2012                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
2013                                                * RTE_MBUF_F_TX_UDP_CKSUM
2014                                                */
2015                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
2016                                                * RTE_MBUF_F_TX_IP_CKSUM
2017                                                */
2018                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2019                                                * RTE_MBUF_F_TX_TCP_CKSUM
2020                                                */
2021                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2022                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2023                                                */
2024                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2025                                                * RTE_MBUF_F_TX_UDP_CKSUM
2026                                                */
2027                                 },
2028
2029                                 {
2030                                         /* [16-31] = ol4type:ol3type */
2031                                         0x00, /* none */
2032                                         0x03, /* OUTER_IP_CKSUM */
2033                                         0x02, /* OUTER_IPV4 */
2034                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2035                                         0x04, /* OUTER_IPV6 */
2036                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2037                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2038                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2039                                                * OUTER_IP_CKSUM
2040                                                */
2041                                         0x00, /* OUTER_UDP_CKSUM */
2042                                         0x33, /* OUTER_UDP_CKSUM |
2043                                                * OUTER_IP_CKSUM
2044                                                */
2045                                         0x32, /* OUTER_UDP_CKSUM |
2046                                                * OUTER_IPV4
2047                                                */
2048                                         0x33, /* OUTER_UDP_CKSUM |
2049                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2050                                                */
2051                                         0x34, /* OUTER_UDP_CKSUM |
2052                                                * OUTER_IPV6
2053                                                */
2054                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2055                                                * OUTER_IP_CKSUM
2056                                                */
2057                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2058                                                * OUTER_IPV4
2059                                                */
2060                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2061                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2062                                                */
2063                                 },
2064                         }};
2065
2066                         /* Extract olflags to translate to oltype & iltype */
2067                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2068                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2069
2070                         /*
2071                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2072                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2073                          */
2074                         const uint32x4_t tshft_4 = {
2075                                 1,
2076                                 0,
2077                                 1,
2078                                 0,
2079                         };
2080                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2081                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2082
2083                         /*
2084                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2085                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2086                          */
2087                         const uint8x16_t shuf_mask5 = {
2088                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2089                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2090                         };
2091                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2092                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2093
2094                         /* Extract outer and inner header ol_flags */
2095                         const uint64x2_t oi_cksum_mask = {
2096                                 0x1CF0020000000000,
2097                                 0x1CF0020000000000,
2098                         };
2099
2100                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2101                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2102
2103                         /* Extract OUTER_UDP_CKSUM bit 41 and
2104                          * move it to bit 61
2105                          */
2106
2107                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2108                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2109
2110                         /* Shift right oltype by 2 and iltype by 4
2111                          * to start oltype nibble from BIT(58)
2112                          * instead of BIT(56) and iltype nibble from BIT(48)
2113                          * instead of BIT(52).
2114                          */
2115                         const int8x16_t tshft5 = {
2116                                 8, 8, 8, 8, 8, 8, -4, -2,
2117                                 8, 8, 8, 8, 8, 8, -4, -2,
2118                         };
2119
2120                         xtmp128 = vshlq_u8(xtmp128, tshft5);
2121                         ytmp128 = vshlq_u8(ytmp128, tshft5);
2122                         /*
2123                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2124                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2125                          */
2126                         const int8x16_t tshft3 = {
2127                                 -1, 0, -1, 0, 0, 0, 0, 0,
2128                                 -1, 0, -1, 0, 0, 0, 0, 0,
2129                         };
2130
2131                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2132                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2133
2134                         /* Mark Bit(4) of oltype */
2135                         const uint64x2_t oi_cksum_mask2 = {
2136                                 0x1000000000000000,
2137                                 0x1000000000000000,
2138                         };
2139
2140                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2141                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2142
2143                         /* Do the lookup */
2144                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2145                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2146
2147                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
2148                          * Bit 56:63 of oltype and place it in corresponding
2149                          * place in senddesc_w1.
2150                          */
2151                         const uint8x16_t shuf_mask0 = {
2152                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2153                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2154                         };
2155
2156                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2157                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2158
2159                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2160                          * l3len, l2len, ol3len, ol2len.
2161                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2162                          * a = a + (a << 8)
2163                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2164                          * a = a + (a << 16)
2165                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2166                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2167                          */
2168                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2169                                                  vshlq_n_u32(senddesc01_w1, 8));
2170                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2171                                                  vshlq_n_u32(senddesc23_w1, 8));
2172
2173                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2174                         senddesc01_w1 = vaddq_u8(
2175                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2176                         senddesc23_w1 = vaddq_u8(
2177                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2178
2179                         /* Move ltypes to senddesc*_w1 */
2180                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2181                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2182                 }
2183
2184                 xmask01 = vdupq_n_u64(0);
2185                 xmask23 = xmask01;
2186                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2187                              : [a] "+w"(xmask01)
2188                              : [in] "r"(mbuf0)
2189                              : "memory");
2190
2191                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2192                              : [a] "+w"(xmask01)
2193                              : [in] "r"(mbuf1)
2194                              : "memory");
2195
2196                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2197                              : [b] "+w"(xmask23)
2198                              : [in] "r"(mbuf2)
2199                              : "memory");
2200
2201                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2202                              : [b] "+w"(xmask23)
2203                              : [in] "r"(mbuf3)
2204                              : "memory");
2205                 xmask01 = vshlq_n_u64(xmask01, 20);
2206                 xmask23 = vshlq_n_u64(xmask23, 20);
2207
2208                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2209                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2210
2211                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2212                         /* Tx ol_flag for vlan. */
2213                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2214                         /* Bit enable for VLAN1 */
2215                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2216                         /* Tx ol_flag for QnQ. */
2217                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2218                         /* Bit enable for VLAN0 */
2219                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2220                         /* Load vlan values from packet. outer is VLAN 0 */
2221                         uint64x2_t ext01 = {
2222                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2223                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2224                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2225                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2226                         };
2227                         uint64x2_t ext23 = {
2228                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2229                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2230                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2231                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2232                         };
2233
2234                         /* Get ol_flags of the packets. */
2235                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2236                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2237
2238                         /* ORR vlan outer/inner values into cmd. */
2239                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2240                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2241
2242                         /* Test for offload enable bits and generate masks. */
2243                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2244                                                       mlv),
2245                                             vandq_u64(vtstq_u64(xtmp128, olq),
2246                                                       mlq));
2247                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2248                                                       mlv),
2249                                             vandq_u64(vtstq_u64(ytmp128, olq),
2250                                                       mlq));
2251
2252                         /* Set vlan enable bits into cmd based on mask. */
2253                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2254                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2255                 }
2256
2257                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2258                         /* Tx ol_flag for timestamp. */
2259                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2260                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
2261                         /* Set send mem alg to SUB. */
2262                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2263                         /* Increment send mem address by 8. */
2264                         const uint64x2_t addr = {0x8, 0x8};
2265
2266                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2267                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2268
2269                         /* Check if timestamp is requested and generate inverted
2270                          * mask as we need not make any changes to default cmd
2271                          * value.
2272                          */
2273                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2274                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2275
2276                         /* Change send mem address to an 8 byte offset when
2277                          * TSTMP is disabled.
2278                          */
2279                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
2280                                                  vandq_u64(xtmp128, addr));
2281                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
2282                                                  vandq_u64(ytmp128, addr));
2283                         /* Change send mem alg to SUB when TSTMP is disabled. */
2284                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
2285                                                  vandq_u64(xtmp128, alg));
2286                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
2287                                                  vandq_u64(ytmp128, alg));
2288
2289                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2290                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2291                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2292                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2293                 }
2294
2295                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2296                         const uint64_t lso_fmt = txq->lso_tun_fmt;
2297                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2298                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2299
2300                         /* Extract SD W1 as we need to set L4 types. */
2301                         vst1q_u64(sd_w1, senddesc01_w1);
2302                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
2303
2304                         /* Extract SX W0 as we need to set LSO fields. */
2305                         vst1q_u64(sx_w0, sendext01_w0);
2306                         vst1q_u64(sx_w0 + 2, sendext23_w0);
2307
2308                         /* Extract ol_flags. */
2309                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2310                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2311
2312                         /* Prepare individual mbufs. */
2313                         cn10k_nix_prepare_tso(tx_pkts[0],
2314                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
2315                                 (union nix_send_ext_w0_u *)&sx_w0[0],
2316                                 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2317
2318                         cn10k_nix_prepare_tso(tx_pkts[1],
2319                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
2320                                 (union nix_send_ext_w0_u *)&sx_w0[1],
2321                                 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2322
2323                         cn10k_nix_prepare_tso(tx_pkts[2],
2324                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
2325                                 (union nix_send_ext_w0_u *)&sx_w0[2],
2326                                 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2327
2328                         cn10k_nix_prepare_tso(tx_pkts[3],
2329                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
2330                                 (union nix_send_ext_w0_u *)&sx_w0[3],
2331                                 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2332
2333                         senddesc01_w1 = vld1q_u64(sd_w1);
2334                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2335
2336                         sendext01_w0 = vld1q_u64(sx_w0);
2337                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
2338                 }
2339
2340                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2341                     !(flags & NIX_TX_MULTI_SEG_F) &&
2342                     !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2343                         /* Set don't free bit if reference count > 1 */
2344                         xmask01 = vdupq_n_u64(0);
2345                         xmask23 = xmask01;
2346
2347                         /* Move mbufs to iova */
2348                         mbuf0 = (uint64_t *)tx_pkts[0];
2349                         mbuf1 = (uint64_t *)tx_pkts[1];
2350                         mbuf2 = (uint64_t *)tx_pkts[2];
2351                         mbuf3 = (uint64_t *)tx_pkts[3];
2352
2353                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2354                                 vsetq_lane_u64(0x80000, xmask01, 0);
2355                         else
2356                                 RTE_MEMPOOL_CHECK_COOKIES(
2357                                         ((struct rte_mbuf *)mbuf0)->pool,
2358                                         (void **)&mbuf0, 1, 0);
2359
2360                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2361                                 vsetq_lane_u64(0x80000, xmask01, 1);
2362                         else
2363                                 RTE_MEMPOOL_CHECK_COOKIES(
2364                                         ((struct rte_mbuf *)mbuf1)->pool,
2365                                         (void **)&mbuf1, 1, 0);
2366
2367                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2368                                 vsetq_lane_u64(0x80000, xmask23, 0);
2369                         else
2370                                 RTE_MEMPOOL_CHECK_COOKIES(
2371                                         ((struct rte_mbuf *)mbuf2)->pool,
2372                                         (void **)&mbuf2, 1, 0);
2373
2374                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2375                                 vsetq_lane_u64(0x80000, xmask23, 1);
2376                         else
2377                                 RTE_MEMPOOL_CHECK_COOKIES(
2378                                         ((struct rte_mbuf *)mbuf3)->pool,
2379                                         (void **)&mbuf3, 1, 0);
2380                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2381                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2382                 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2383                            !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2384                         /* Move mbufs to iova */
2385                         mbuf0 = (uint64_t *)tx_pkts[0];
2386                         mbuf1 = (uint64_t *)tx_pkts[1];
2387                         mbuf2 = (uint64_t *)tx_pkts[2];
2388                         mbuf3 = (uint64_t *)tx_pkts[3];
2389
2390                         /* Mark mempool object as "put" since
2391                          * it is freed by NIX
2392                          */
2393                         RTE_MEMPOOL_CHECK_COOKIES(
2394                                 ((struct rte_mbuf *)mbuf0)->pool,
2395                                 (void **)&mbuf0, 1, 0);
2396
2397                         RTE_MEMPOOL_CHECK_COOKIES(
2398                                 ((struct rte_mbuf *)mbuf1)->pool,
2399                                 (void **)&mbuf1, 1, 0);
2400
2401                         RTE_MEMPOOL_CHECK_COOKIES(
2402                                 ((struct rte_mbuf *)mbuf2)->pool,
2403                                 (void **)&mbuf2, 1, 0);
2404
2405                         RTE_MEMPOOL_CHECK_COOKIES(
2406                                 ((struct rte_mbuf *)mbuf3)->pool,
2407                                 (void **)&mbuf3, 1, 0);
2408                 }
2409
2410                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2411                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2412                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2413                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2414                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2415
2416                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2417                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2418                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2419                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2420
2421                 if (flags & NIX_TX_NEED_EXT_HDR) {
2422                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2423                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2424                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2425                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2426                 }
2427
2428                 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2429                         const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2430                                                 RTE_MBUF_F_TX_SEC_OFFLOAD};
2431                         uintptr_t next;
2432                         uint8_t dw;
2433
2434                         /* Extract ol_flags. */
2435                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2436                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2437
2438                         xtmp128 = vtstq_u64(olf, xtmp128);
2439                         ytmp128 = vtstq_u64(olf, ytmp128);
2440
2441                         /* Process mbuf0 */
2442                         dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2443                         if (vgetq_lane_u64(xtmp128, 0))
2444                                 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2445                                                        &cmd1[0], &next, c_laddr,
2446                                                        &c_lnum, &c_loff,
2447                                                        &c_shft, sa_base, flags);
2448                         else
2449                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2450                                                    &shift, &wd.data128, &next);
2451
2452                         /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2453                         cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2454                                              cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2455                                              flags);
2456
2457                         /* Process mbuf1 */
2458                         dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2459                         if (vgetq_lane_u64(xtmp128, 1))
2460                                 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2461                                                        &cmd1[1], &next, c_laddr,
2462                                                        &c_lnum, &c_loff,
2463                                                        &c_shft, sa_base, flags);
2464                         else
2465                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2466                                                    &shift, &wd.data128, &next);
2467
2468                         /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2469                         cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2470                                              cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2471                                              flags);
2472
2473                         /* Process mbuf2 */
2474                         dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2475                         if (vgetq_lane_u64(ytmp128, 0))
2476                                 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2477                                                        &cmd1[2], &next, c_laddr,
2478                                                        &c_lnum, &c_loff,
2479                                                        &c_shft, sa_base, flags);
2480                         else
2481                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2482                                                    &shift, &wd.data128, &next);
2483
2484                         /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2485                         cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2486                                              cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2487                                              flags);
2488
2489                         /* Process mbuf3 */
2490                         dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2491                         if (vgetq_lane_u64(ytmp128, 1))
2492                                 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2493                                                        &cmd1[3], &next, c_laddr,
2494                                                        &c_lnum, &c_loff,
2495                                                        &c_shft, sa_base, flags);
2496                         else
2497                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2498                                                    &shift, &wd.data128, &next);
2499
2500                         /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2501                         cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2502                                              cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2503                                              flags);
2504
2505                 } else if (flags & NIX_TX_MULTI_SEG_F) {
2506                         uint8_t j;
2507
2508                         segdw[4] = 8;
2509                         j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2510                                                           cmd2, cmd3, segdw,
2511                                                           (uint64_t *)
2512                                                           LMT_OFF(laddr, lnum,
2513                                                                   0),
2514                                                           &wd.data128, &shift,
2515                                                           flags);
2516                         lnum += j;
2517                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2518                         /* Store the prepared send desc to LMT lines */
2519                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2520                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2521                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2522                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2523                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2524                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2525                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2526                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2527                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2528                                 lnum += 1;
2529                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2530                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2531                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2532                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2533                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2534                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2535                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2536                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2537                         } else {
2538                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2539                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2540                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2541                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2542                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2543                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2544                                 lnum += 1;
2545                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2546                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2547                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2548                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2549                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2550                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2551                         }
2552                         lnum += 1;
2553                 } else {
2554                         /* Store the prepared send desc to LMT lines */
2555                         vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2556                         vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2557                         vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2558                         vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2559                         vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2560                         vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2561                         vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2562                         vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2563                         lnum += 1;
2564                 }
2565
2566                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2567         }
2568
2569         /* Roundup lnum to last line if it is partial */
2570         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2571                 lnum = lnum + !!loff;
2572                 wd.data128 = wd.data128 |
2573                         (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2574         }
2575
2576         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2577                 wd.data[0] >>= 16;
2578
2579         if (flags & NIX_TX_VWQE_F)
2580                 roc_sso_hws_head_wait(base);
2581
2582         left -= burst;
2583
2584         /* Submit CPT instructions if any */
2585         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2586                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2587                                      c_shft);
2588
2589         /* Trigger LMTST */
2590         if (lnum > 16) {
2591                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2592                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2593
2594                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2595                 wd.data[0] &= ~0x7ULL;
2596
2597                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2598                         wd.data[0] <<= 16;
2599
2600                 wd.data[0] |= (15ULL << 12);
2601                 wd.data[0] |= (uint64_t)lmt_id;
2602
2603                 /* STEOR0 */
2604                 roc_lmt_submit_steorl(wd.data[0], pa);
2605
2606                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2607                         wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2608
2609                 pa = io_addr | (wd.data[1] & 0x7) << 4;
2610                 wd.data[1] &= ~0x7ULL;
2611
2612                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2613                         wd.data[1] <<= 16;
2614
2615                 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2616                 wd.data[1] |= (uint64_t)(lmt_id + 16);
2617
2618                 /* STEOR1 */
2619                 roc_lmt_submit_steorl(wd.data[1], pa);
2620         } else if (lnum) {
2621                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2622                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2623
2624                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2625                 wd.data[0] &= ~0x7ULL;
2626
2627                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2628                         wd.data[0] <<= 16;
2629
2630                 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2631                 wd.data[0] |= lmt_id;
2632
2633                 /* STEOR0 */
2634                 roc_lmt_submit_steorl(wd.data[0], pa);
2635         }
2636
2637         rte_io_wmb();
2638         if (left)
2639                 goto again;
2640
2641         if (unlikely(scalar)) {
2642                 if (flags & NIX_TX_MULTI_SEG_F)
2643                         pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2644                                                          scalar, cmd, base,
2645                                                          flags);
2646                 else
2647                         pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2648                                                     cmd, base, flags);
2649         }
2650
2651         return pkts;
2652 }
2653
2654 #else
2655 static __rte_always_inline uint16_t
2656 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2657                            uint16_t pkts, uint64_t *cmd, uintptr_t base,
2658                            const uint16_t flags)
2659 {
2660         RTE_SET_USED(tx_queue);
2661         RTE_SET_USED(tx_pkts);
2662         RTE_SET_USED(pkts);
2663         RTE_SET_USED(cmd);
2664         RTE_SET_USED(flags);
2665         RTE_SET_USED(base);
2666         return 0;
2667 }
2668 #endif
2669
2670 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2671 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2672 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2673 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2674 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
2675 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2676 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2677
2678 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2679 #define NIX_TX_FASTPATH_MODES_0_15                                             \
2680         T(no_offload, 4, NIX_TX_OFFLOAD_NONE)                                  \
2681         T(l3l4csum, 4, L3L4CSUM_F)                                             \
2682         T(ol3ol4csum, 4, OL3OL4CSUM_F)                                         \
2683         T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F)                   \
2684         T(vlan, 6, VLAN_F)                                                     \
2685         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
2686         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
2687         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2688         T(noff, 4, NOFF_F)                                                     \
2689         T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F)                               \
2690         T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F)                           \
2691         T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2692         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
2693         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2694         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
2695         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
2696           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2697
2698 #define NIX_TX_FASTPATH_MODES_16_31                                            \
2699         T(tso, 6, TSO_F)                                                       \
2700         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
2701         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
2702         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
2703         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
2704         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
2705         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
2706         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2707           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2708         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
2709         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
2710         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
2711         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
2712           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2713         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
2714         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
2715         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2716         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2717           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2718
2719 #define NIX_TX_FASTPATH_MODES_32_47                                            \
2720         T(ts, 8, TSP_F)                                                        \
2721         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
2722         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
2723         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2724         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
2725         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
2726         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2727         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
2728           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2729         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
2730         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
2731         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2732         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
2733           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2734         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
2735         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
2736         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
2737         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
2738           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2739
2740 #define NIX_TX_FASTPATH_MODES_48_63                                            \
2741         T(ts_tso, 8, TSP_F | TSO_F)                                            \
2742         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
2743         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
2744         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
2745           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
2746         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
2747         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
2748         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
2749         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2750           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2751         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
2752         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
2753         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
2754         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
2755           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2756         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
2757         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
2758           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
2759         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
2760           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
2761         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2762           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2763
2764 #define NIX_TX_FASTPATH_MODES_64_79                                            \
2765         T(sec, 4, T_SEC_F)                                                     \
2766         T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F)                               \
2767         T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F)                           \
2768         T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2769         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
2770         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
2771         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
2772         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2773           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2774         T(sec_noff, 4, T_SEC_F | NOFF_F)                                       \
2775         T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2776         T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2777         T(sec_noff_ol3ol4csum_l3l4csum, 4,                                     \
2778           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2779         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2780         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2781         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2782           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2783         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2784           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2785
2786 #define NIX_TX_FASTPATH_MODES_80_95                                            \
2787         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2788         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2789         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2790         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2791           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2792         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2793         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2794         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2795         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2796           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2797         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2798         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2799         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2800         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2801           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2802         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2803         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2804           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2805         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2806           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2807         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2808           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2809
2810 #define NIX_TX_FASTPATH_MODES_96_111                                           \
2811         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2812         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2813         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2814         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2815           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2816         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2817         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2818         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2819         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2820           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2821         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2822         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2823         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2824         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2825           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2826         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2827         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2828           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2829         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2830           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2831         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2832           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2833
2834 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2835         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2836         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2837         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2838         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2839           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2840         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2841         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2842           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2843         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2844           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2845         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2846           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2847         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2848         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2849           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2850         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2851           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2852         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2853           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2854         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2855         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2856           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2857         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2858           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2859         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2860           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2861                   L3L4CSUM_F)
2862
2863 #define NIX_TX_FASTPATH_MODES                                                  \
2864         NIX_TX_FASTPATH_MODES_0_15                                             \
2865         NIX_TX_FASTPATH_MODES_16_31                                            \
2866         NIX_TX_FASTPATH_MODES_32_47                                            \
2867         NIX_TX_FASTPATH_MODES_48_63                                            \
2868         NIX_TX_FASTPATH_MODES_64_79                                            \
2869         NIX_TX_FASTPATH_MODES_80_95                                            \
2870         NIX_TX_FASTPATH_MODES_96_111                                           \
2871         NIX_TX_FASTPATH_MODES_112_127
2872
2873 #define T(name, sz, flags)                                                     \
2874         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
2875                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2876         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
2877                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2878         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
2879                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2880         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2881                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2882
2883 NIX_TX_FASTPATH_MODES
2884 #undef T
2885
2886 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2887         uint16_t __rte_noinline __rte_hot fn(                                  \
2888                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2889         {                                                                      \
2890                 uint64_t cmd[sz];                                              \
2891                 /* For TSO inner checksum is a must */                         \
2892                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2893                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2894                         return 0;                                              \
2895                 return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, 0,    \
2896                                            flags);                             \
2897         }
2898
2899 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2900         uint16_t __rte_noinline __rte_hot fn(                                  \
2901                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2902         {                                                                      \
2903                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2904                 /* For TSO inner checksum is a must */                         \
2905                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2906                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2907                         return 0;                                              \
2908                 return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,  \
2909                                                 0,                             \
2910                                                 flags | NIX_TX_MULTI_SEG_F);   \
2911         }
2912
2913 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
2914         uint16_t __rte_noinline __rte_hot fn(                                  \
2915                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2916         {                                                                      \
2917                 uint64_t cmd[sz];                                              \
2918                 /* For TSO inner checksum is a must */                         \
2919                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2920                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2921                         return 0;                                              \
2922                 return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts,     \
2923                                                   cmd, 0, (flags));            \
2924         }
2925
2926 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
2927         uint16_t __rte_noinline __rte_hot fn(                                  \
2928                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2929         {                                                                      \
2930                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2931                 /* For TSO inner checksum is a must */                         \
2932                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2933                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2934                         return 0;                                              \
2935                 return cn10k_nix_xmit_pkts_vector(                             \
2936                         tx_queue, tx_pkts, pkts, cmd, 0,                       \
2937                         (flags) | NIX_TX_MULTI_SEG_F);                         \
2938         }
2939
2940 #endif /* __CN10K_TX_H__ */