4ae6bbf517331a2c9910701a01d1bfcdc152874c
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #include <rte_eventdev.h>
10
11 #define NIX_TX_OFFLOAD_NONE           (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
19 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
20
21 /* Flags to control xmit_prepare function.
22  * Defining it from backwards to denote its been
23  * not used as offload flags to pick function
24  */
25 #define NIX_TX_VWQE_F      BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
27
28 #define NIX_TX_NEED_SEND_HDR_W1                                                \
29         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
30          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31
32 #define NIX_TX_NEED_EXT_HDR                                                    \
33         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
34          NIX_TX_OFFLOAD_TSO_F)
35
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
37         do {                                                                   \
38                 /* Cached value is low, Update the fc_cache_pkts */            \
39                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
40                         /* Multiply with sqe_per_sqb to express in pkts */     \
41                         (txq)->fc_cache_pkts =                                 \
42                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
43                                 << (txq)->sqes_per_sqb_log2;                   \
44                         /* Check it again for the room */                      \
45                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
46                                 return 0;                                      \
47                 }                                                              \
48         } while (0)
49
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51  * is encoded as 4bits.
52  */
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
54
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56
57 /* Function to determine no of tx subdesc required in case ext
58  * sub desc is enabled.
59  */
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 {
63         return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
64                              2 :
65                              ((flags &
66                          (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
67                                       1 :
68                                       0);
69 }
70
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73 {
74         if (!(flags & NIX_TX_MULTI_SEG_F))
75                 return cn10k_nix_tx_ext_subs(flags) + 2;
76
77         /* Already everything is accounted for in segdw */
78         return segdw;
79 }
80
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83 {
84         return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85                << ROC_LMT_LINES_PER_CORE_LOG2;
86 }
87
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90 {
91         return (flags & NIX_TX_NEED_EXT_HDR) ?
92                              ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
93                              8;
94 }
95
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
98 {
99         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
100         uint64_t data;
101
102         /* This will be moved to addr area */
103         data = dw_m1;
104         /* 15 vector sizes for single seg */
105         data |= dw_m1 << 19;
106         data |= dw_m1 << 22;
107         data |= dw_m1 << 25;
108         data |= dw_m1 << 28;
109         data |= dw_m1 << 31;
110         data |= dw_m1 << 34;
111         data |= dw_m1 << 37;
112         data |= dw_m1 << 40;
113         data |= dw_m1 << 43;
114         data |= dw_m1 << 46;
115         data |= dw_m1 << 49;
116         data |= dw_m1 << 52;
117         data |= dw_m1 << 55;
118         data |= dw_m1 << 58;
119         data |= dw_m1 << 61;
120
121         return data;
122 }
123
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126 {
127         return ((flags & NIX_TX_NEED_EXT_HDR) ?
128                               (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
129                               4);
130 }
131
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134 {
135         const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
136         uint64_t data;
137
138         /* This will be moved to addr area */
139         data = dw_m1;
140         /* 15 vector sizes for single seg */
141         data |= dw_m1 << 19;
142         data |= dw_m1 << 22;
143         data |= dw_m1 << 25;
144         data |= dw_m1 << 28;
145         data |= dw_m1 << 31;
146         data |= dw_m1 << 34;
147         data |= dw_m1 << 37;
148         data |= dw_m1 << 40;
149         data |= dw_m1 << 43;
150         data |= dw_m1 << 46;
151         data |= dw_m1 << 49;
152         data |= dw_m1 << 52;
153         data |= dw_m1 << 55;
154         data |= dw_m1 << 58;
155         data |= dw_m1 << 61;
156
157         return data;
158 }
159
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
162 {
163         /* We have two CPT instructions per LMTLine */
164         const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
165         uint64_t data;
166
167         /* This will be moved to addr area */
168         data = dw_m1 << 16;
169         data |= dw_m1 << 19;
170         data |= dw_m1 << 22;
171         data |= dw_m1 << 25;
172         data |= dw_m1 << 28;
173         data |= dw_m1 << 31;
174         data |= dw_m1 << 34;
175         data |= dw_m1 << 37;
176         data |= dw_m1 << 40;
177         data |= dw_m1 << 43;
178         data |= dw_m1 << 46;
179         data |= dw_m1 << 49;
180         data |= dw_m1 << 52;
181         data |= dw_m1 << 55;
182         data |= dw_m1 << 58;
183         data |= dw_m1 << 61;
184
185         return data;
186 }
187
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
190                       const uint16_t flags, const uint16_t static_sz)
191 {
192         if (static_sz)
193                 cmd[0] = txq->send_hdr_w0;
194         else
195                 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
196                          ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
197         cmd[1] = 0;
198
199         if (flags & NIX_TX_NEED_EXT_HDR) {
200                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
201                         cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
202                 else
203                         cmd[2] = NIX_SUBDC_EXT << 60;
204                 cmd[3] = 0;
205                 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
206         } else {
207                 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
208         }
209 }
210
211 static __rte_always_inline void
212 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
213                      uint8_t loff, uint8_t shft)
214 {
215         uint64_t data;
216         uintptr_t pa;
217
218         /* Check if there is any CPT instruction to submit */
219         if (!lnum && !loff)
220                 return;
221
222         data = cn10k_cpt_tx_steor_data();
223         /* Update lmtline use for partial end line */
224         if (loff) {
225                 data &= ~(0x7ULL << shft);
226                 /* Update it to half full i.e 64B */
227                 data |= (0x3UL << shft);
228         }
229
230         pa = io_addr | ((data >> 16) & 0x7) << 4;
231         data &= ~(0x7ULL << 16);
232         /* Update lines - 1 that contain valid data */
233         data |= ((uint64_t)(lnum + loff - 1)) << 12;
234         data |= lmt_id;
235
236         /* STEOR */
237         roc_lmt_submit_steorl(data, pa);
238 }
239
240 #if defined(RTE_ARCH_ARM64)
241 static __rte_always_inline void
242 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
243                        uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
244                        uint8_t *loff, uint8_t *shft, uint64_t sa_base,
245                        const uint16_t flags)
246 {
247         struct cn10k_sec_sess_priv sess_priv;
248         uint32_t pkt_len, dlen_adj, rlen;
249         uint64x2_t cmd01, cmd23;
250         uintptr_t dptr, nixtx;
251         uint64_t ucode_cmd[4];
252         uint64_t *laddr;
253         uint8_t l2_len;
254         uint16_t tag;
255         uint64_t sa;
256
257         sess_priv.u64 = *rte_security_dynfield(m);
258
259         if (flags & NIX_TX_NEED_SEND_HDR_W1)
260                 l2_len = vgetq_lane_u8(*cmd0, 8);
261         else
262                 l2_len = m->l2_len;
263
264         /* Retrieve DPTR */
265         dptr = vgetq_lane_u64(*cmd1, 1);
266         pkt_len = vgetq_lane_u16(*cmd0, 0);
267
268         /* Calculate dlen adj */
269         dlen_adj = pkt_len - l2_len;
270         rlen = (dlen_adj + sess_priv.roundup_len) +
271                (sess_priv.roundup_byte - 1);
272         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
273         rlen += sess_priv.partial_len;
274         dlen_adj = rlen - dlen_adj;
275
276         /* Update send descriptors. Security is single segment only */
277         *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
278         *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
279
280         /* Get area where NIX descriptor needs to be stored */
281         nixtx = dptr + pkt_len + dlen_adj;
282         nixtx += BIT_ULL(7);
283         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
284
285         /* Return nixtx addr */
286         *nixtx_addr = (nixtx + 16);
287
288         /* DLEN passed is excluding L2HDR */
289         pkt_len -= l2_len;
290         tag = sa_base & 0xFFFFUL;
291         sa_base &= ~0xFFFFUL;
292         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
293         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
294         ucode_cmd[0] =
295                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
296
297         /* CPT Word 0 and Word 1 */
298         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
299         /* CPT_RES_S is 16B above NIXTX */
300         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
301
302         /* CPT word 2 and 3 */
303         cmd23 = vdupq_n_u64(0);
304         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
305                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
306         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
307
308         dptr += l2_len;
309
310         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
311                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
312                         *((uint16_t *)(dptr - 2)) =
313                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
314                 else
315                         *((uint16_t *)(dptr - 2)) =
316                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
317         }
318
319         ucode_cmd[1] = dptr;
320         ucode_cmd[2] = dptr;
321
322         /* Move to our line */
323         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
324
325         /* Write CPT instruction to lmt line */
326         vst1q_u64(laddr, cmd01);
327         vst1q_u64((laddr + 2), cmd23);
328
329         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
330         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
331
332         /* Move to next line for every other CPT inst */
333         *loff = !(*loff);
334         *lnum = *lnum + (*loff ? 0 : 1);
335         *shft = *shft + (*loff ? 0 : 3);
336 }
337
338 static __rte_always_inline void
339 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
340                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
341                    uint64_t sa_base, const uint16_t flags)
342 {
343         struct cn10k_sec_sess_priv sess_priv;
344         uint32_t pkt_len, dlen_adj, rlen;
345         struct nix_send_hdr_s *send_hdr;
346         uint64x2_t cmd01, cmd23;
347         union nix_send_sg_s *sg;
348         uintptr_t dptr, nixtx;
349         uint64_t ucode_cmd[4];
350         uint64_t *laddr;
351         uint8_t l2_len;
352         uint16_t tag;
353         uint64_t sa;
354
355         /* Move to our line from base */
356         sess_priv.u64 = *rte_security_dynfield(m);
357         send_hdr = (struct nix_send_hdr_s *)cmd;
358         if (flags & NIX_TX_NEED_EXT_HDR)
359                 sg = (union nix_send_sg_s *)&cmd[4];
360         else
361                 sg = (union nix_send_sg_s *)&cmd[2];
362
363         if (flags & NIX_TX_NEED_SEND_HDR_W1)
364                 l2_len = cmd[1] & 0xFF;
365         else
366                 l2_len = m->l2_len;
367
368         /* Retrieve DPTR */
369         dptr = *(uint64_t *)(sg + 1);
370         pkt_len = send_hdr->w0.total;
371
372         /* Calculate dlen adj */
373         dlen_adj = pkt_len - l2_len;
374         rlen = (dlen_adj + sess_priv.roundup_len) +
375                (sess_priv.roundup_byte - 1);
376         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
377         rlen += sess_priv.partial_len;
378         dlen_adj = rlen - dlen_adj;
379
380         /* Update send descriptors. Security is single segment only */
381         send_hdr->w0.total = pkt_len + dlen_adj;
382         sg->seg1_size = pkt_len + dlen_adj;
383
384         /* Get area where NIX descriptor needs to be stored */
385         nixtx = dptr + pkt_len + dlen_adj;
386         nixtx += BIT_ULL(7);
387         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
388
389         /* Return nixtx addr */
390         *nixtx_addr = (nixtx + 16);
391
392         /* DLEN passed is excluding L2HDR */
393         pkt_len -= l2_len;
394         tag = sa_base & 0xFFFFUL;
395         sa_base &= ~0xFFFFUL;
396         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
397         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
398         ucode_cmd[0] =
399                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
400
401         /* CPT Word 0 and Word 1. Assume no multi-seg support */
402         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
403         /* CPT_RES_S is 16B above NIXTX */
404         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
405
406         /* CPT word 2 and 3 */
407         cmd23 = vdupq_n_u64(0);
408         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
409                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
410         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
411
412         dptr += l2_len;
413
414         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
415                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
416                         *((uint16_t *)(dptr - 2)) =
417                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
418                 else
419                         *((uint16_t *)(dptr - 2)) =
420                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
421         }
422         ucode_cmd[1] = dptr;
423         ucode_cmd[2] = dptr;
424
425         /* Move to our line */
426         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
427
428         /* Write CPT instruction to lmt line */
429         vst1q_u64(laddr, cmd01);
430         vst1q_u64((laddr + 2), cmd23);
431
432         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
433         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
434
435         /* Move to next line for every other CPT inst */
436         *loff = !(*loff);
437         *lnum = *lnum + (*loff ? 0 : 1);
438         *shft = *shft + (*loff ? 0 : 3);
439 }
440
441 #else
442
443 static __rte_always_inline void
444 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
445                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
446                    uint64_t sa_base, const uint16_t flags)
447 {
448         RTE_SET_USED(m);
449         RTE_SET_USED(cmd);
450         RTE_SET_USED(nixtx_addr);
451         RTE_SET_USED(lbase);
452         RTE_SET_USED(lnum);
453         RTE_SET_USED(loff);
454         RTE_SET_USED(shft);
455         RTE_SET_USED(sa_base);
456         RTE_SET_USED(flags);
457 }
458 #endif
459
460 static __rte_always_inline void
461 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
462 {
463         uint64_t mask, ol_flags = m->ol_flags;
464
465         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
466                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
467                 uint16_t *iplen, *oiplen, *oudplen;
468                 uint16_t lso_sb, paylen;
469
470                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
471                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
472                          m->l2_len + m->l3_len + m->l4_len;
473
474                 /* Reduce payload len from base headers */
475                 paylen = m->pkt_len - lso_sb;
476
477                 /* Get iplen position assuming no tunnel hdr */
478                 iplen = (uint16_t *)(mdata + m->l2_len +
479                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
480                 /* Handle tunnel tso */
481                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
482                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
483                         const uint8_t is_udp_tun =
484                                 (CNXK_NIX_UDP_TUN_BITMASK >>
485                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
486                                 0x1;
487
488                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
489                                               (2 << !!(ol_flags &
490                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
491                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
492                                                    paylen);
493
494                         /* Update format for UDP tunneled packet */
495                         if (is_udp_tun) {
496                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
497                                                        m->outer_l3_len + 4);
498                                 *oudplen = rte_cpu_to_be_16(
499                                         rte_be_to_cpu_16(*oudplen) - paylen);
500                         }
501
502                         /* Update iplen position to inner ip hdr */
503                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
504                                              m->l4_len +
505                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
506                 }
507
508                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
509         }
510 }
511
512 static __rte_always_inline void
513 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
514                        const uint64_t lso_tun_fmt, bool *sec)
515 {
516         struct nix_send_ext_s *send_hdr_ext;
517         struct nix_send_hdr_s *send_hdr;
518         uint64_t ol_flags = 0, mask;
519         union nix_send_hdr_w1_u w1;
520         union nix_send_sg_s *sg;
521
522         send_hdr = (struct nix_send_hdr_s *)cmd;
523         if (flags & NIX_TX_NEED_EXT_HDR) {
524                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
525                 sg = (union nix_send_sg_s *)(cmd + 4);
526                 /* Clear previous markings */
527                 send_hdr_ext->w0.lso = 0;
528                 send_hdr_ext->w1.u = 0;
529         } else {
530                 sg = (union nix_send_sg_s *)(cmd + 2);
531         }
532
533         if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
534                 ol_flags = m->ol_flags;
535                 w1.u = 0;
536         }
537
538         if (!(flags & NIX_TX_MULTI_SEG_F))
539                 send_hdr->w0.total = m->data_len;
540         else
541                 send_hdr->w0.total = m->pkt_len;
542         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
543
544         /*
545          * L3type:  2 => IPV4
546          *          3 => IPV4 with csum
547          *          4 => IPV6
548          * L3type and L3ptr needs to be set for either
549          * L3 csum or L4 csum or LSO
550          *
551          */
552
553         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
554             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
555                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
556                 const uint8_t ol3type =
557                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
558                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
559                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
560
561                 /* Outer L3 */
562                 w1.ol3type = ol3type;
563                 mask = 0xffffull << ((!!ol3type) << 4);
564                 w1.ol3ptr = ~mask & m->outer_l2_len;
565                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
566
567                 /* Outer L4 */
568                 w1.ol4type = csum + (csum << 1);
569
570                 /* Inner L3 */
571                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
572                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
573                 w1.il3ptr = w1.ol4ptr + m->l2_len;
574                 w1.il4ptr = w1.il3ptr + m->l3_len;
575                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
576                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
577
578                 /* Inner L4 */
579                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
580
581                 /* In case of no tunnel header use only
582                  * shift IL3/IL4 fields a bit to use
583                  * OL3/OL4 for header checksum
584                  */
585                 mask = !ol3type;
586                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
587                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
588
589         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
590                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
591                 const uint8_t outer_l2_len = m->outer_l2_len;
592
593                 /* Outer L3 */
594                 w1.ol3ptr = outer_l2_len;
595                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
596                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
597                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
598                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
599                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
600
601                 /* Outer L4 */
602                 w1.ol4type = csum + (csum << 1);
603
604         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
605                 const uint8_t l2_len = m->l2_len;
606
607                 /* Always use OLXPTR and OLXTYPE when only
608                  * when one header is present
609                  */
610
611                 /* Inner L3 */
612                 w1.ol3ptr = l2_len;
613                 w1.ol4ptr = l2_len + m->l3_len;
614                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
615                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
616                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
617                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
618
619                 /* Inner L4 */
620                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
621         }
622
623         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
624                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
625                 /* HW will update ptr after vlan0 update */
626                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
627                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
628
629                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
630                 /* 2B before end of l2 header */
631                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
632                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
633         }
634
635         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
636                 uint16_t lso_sb;
637                 uint64_t mask;
638
639                 mask = -(!w1.il3type);
640                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
641
642                 send_hdr_ext->w0.lso_sb = lso_sb;
643                 send_hdr_ext->w0.lso = 1;
644                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
645                 send_hdr_ext->w0.lso_format =
646                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
647                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
648
649                 /* Handle tunnel tso */
650                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
651                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
652                         const uint8_t is_udp_tun =
653                                 (CNXK_NIX_UDP_TUN_BITMASK >>
654                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
655                                 0x1;
656                         uint8_t shift = is_udp_tun ? 32 : 0;
657
658                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
659                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
660
661                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
662                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
663                         /* Update format for UDP tunneled packet */
664                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
665                 }
666         }
667
668         if (flags & NIX_TX_NEED_SEND_HDR_W1)
669                 send_hdr->w1.u = w1.u;
670
671         if (!(flags & NIX_TX_MULTI_SEG_F)) {
672                 sg->seg1_size = send_hdr->w0.total;
673                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
674
675                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
676                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
677                          *              is greater than 1
678                          * DF bit = 0 otherwise
679                          */
680                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
681                 }
682                 /* Mark mempool object as "put" since it is freed by NIX */
683                 if (!send_hdr->w0.df)
684                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
685         } else {
686                 sg->seg1_size = m->data_len;
687                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
688
689                 /* NOFF is handled later for multi-seg */
690         }
691
692         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
693                 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
694 }
695
696 static __rte_always_inline void
697 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
698                            const uint16_t flags)
699 {
700         struct nix_send_ext_s *send_hdr_ext;
701         union nix_send_sg_s *sg;
702
703         /* With minimal offloads, 'cmd' being local could be optimized out to
704          * registers. In other cases, 'cmd' will be in stack. Intent is
705          * 'cmd' stores content from txq->cmd which is copied only once.
706          */
707         *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
708         lmt_addr += 16;
709         if (flags & NIX_TX_NEED_EXT_HDR) {
710                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
711                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
712                 lmt_addr += 16;
713
714                 sg = (union nix_send_sg_s *)(cmd + 4);
715         } else {
716                 sg = (union nix_send_sg_s *)(cmd + 2);
717         }
718         /* In case of multi-seg, sg template is stored here */
719         *((union nix_send_sg_s *)lmt_addr) = *sg;
720         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
721 }
722
723 static __rte_always_inline void
724 cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
725                               const uint64_t ol_flags, const uint16_t no_segdw,
726                               const uint16_t flags)
727 {
728         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
729                 const uint8_t is_ol_tstamp =
730                         !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
731                 uint64_t *lmt = (uint64_t *)lmt_addr;
732                 uint16_t off = (no_segdw - 1) << 1;
733                 struct nix_send_mem_s *send_mem;
734
735                 send_mem = (struct nix_send_mem_s *)(lmt + off);
736                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
737                  * should not be recorded, hence changing the alg type to
738                  * NIX_SENDMEMALG_SUB and also changing send mem addr field to
739                  * next 8 bytes as it corrupts the actual Tx tstamp registered
740                  * address.
741                  */
742                 send_mem->w0.subdc = NIX_SUBDC_MEM;
743                 send_mem->w0.alg =
744                         NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
745                 send_mem->addr =
746                         (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
747         }
748 }
749
750 static __rte_always_inline uint16_t
751 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
752 {
753         struct nix_send_hdr_s *send_hdr;
754         union nix_send_sg_s *sg;
755         struct rte_mbuf *m_next;
756         uint64_t *slist, sg_u;
757         uint64_t nb_segs;
758         uint64_t segdw;
759         uint8_t off, i;
760
761         send_hdr = (struct nix_send_hdr_s *)cmd;
762
763         if (flags & NIX_TX_NEED_EXT_HDR)
764                 off = 2;
765         else
766                 off = 0;
767
768         sg = (union nix_send_sg_s *)&cmd[2 + off];
769
770         /* Start from second segment, first segment is already there */
771         i = 1;
772         sg_u = sg->u;
773         nb_segs = m->nb_segs - 1;
774         m_next = m->next;
775         slist = &cmd[3 + off + 1];
776
777         /* Set invert df if buffer is not to be freed by H/W */
778         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
779                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
780
781                 /* Mark mempool object as "put" since it is freed by NIX */
782 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
783         if (!(sg_u & (1ULL << 55)))
784                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
785         rte_io_wmb();
786 #endif
787         m = m_next;
788         if (!m)
789                 goto done;
790
791         /* Fill mbuf segments */
792         do {
793                 m_next = m->next;
794                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
795                 *slist = rte_mbuf_data_iova(m);
796                 /* Set invert df if buffer is not to be freed by H/W */
797                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
798                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
799                         /* Mark mempool object as "put" since it is freed by NIX
800                          */
801 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
802                 if (!(sg_u & (1ULL << (i + 55))))
803                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
804 #endif
805                 slist++;
806                 i++;
807                 nb_segs--;
808                 if (i > 2 && nb_segs) {
809                         i = 0;
810                         /* Next SG subdesc */
811                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
812                         sg->u = sg_u;
813                         sg->segs = 3;
814                         sg = (union nix_send_sg_s *)slist;
815                         sg_u = sg->u;
816                         slist++;
817                 }
818                 m = m_next;
819         } while (nb_segs);
820
821 done:
822         sg->u = sg_u;
823         sg->segs = i;
824         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
825         /* Roundup extra dwords to multiple of 2 */
826         segdw = (segdw >> 1) + (segdw & 0x1);
827         /* Default dwords */
828         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
829         send_hdr->w0.sizem1 = segdw - 1;
830
831         return segdw;
832 }
833
834 static __rte_always_inline uint16_t
835 cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
836                     uint16_t pkts, uint64_t *cmd, const uint16_t flags)
837 {
838         struct cn10k_eth_txq *txq = tx_queue;
839         const rte_iova_t io_addr = txq->io_addr;
840         uint8_t lnum, c_lnum, c_shft, c_loff;
841         uintptr_t pa, lbase = txq->lmt_base;
842         uint16_t lmt_id, burst, left, i;
843         uintptr_t c_lbase = lbase;
844         rte_iova_t c_io_addr;
845         uint64_t lso_tun_fmt;
846         uint16_t c_lmt_id;
847         uint64_t sa_base;
848         uintptr_t laddr;
849         uint64_t data;
850         bool sec;
851
852         if (!(flags & NIX_TX_VWQE_F)) {
853                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
854                 /* Reduce the cached count */
855                 txq->fc_cache_pkts -= pkts;
856         }
857         /* Get cmd skeleton */
858         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
859
860         if (flags & NIX_TX_OFFLOAD_TSO_F)
861                 lso_tun_fmt = txq->lso_tun_fmt;
862
863         /* Get LMT base address and LMT ID as lcore id */
864         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
865         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
866                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
867                 c_io_addr = txq->cpt_io_addr;
868                 sa_base = txq->sa_base;
869         }
870
871         left = pkts;
872 again:
873         burst = left > 32 ? 32 : left;
874
875         lnum = 0;
876         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
877                 c_lnum = 0;
878                 c_loff = 0;
879                 c_shft = 16;
880         }
881
882         for (i = 0; i < burst; i++) {
883                 /* Perform header writes for TSO, barrier at
884                  * lmt steorl will suffice.
885                  */
886                 if (flags & NIX_TX_OFFLOAD_TSO_F)
887                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
888
889                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
890                                        &sec);
891
892                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
893
894                 /* Prepare CPT instruction and get nixtx addr */
895                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
896                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
897                                            &c_lnum, &c_loff, &c_shft, sa_base,
898                                            flags);
899
900                 /* Move NIX desc to LMT/NIXTX area */
901                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
902                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
903                                               4, flags);
904                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
905                         lnum++;
906         }
907
908         if (flags & NIX_TX_VWQE_F)
909                 roc_sso_hws_head_wait(ws[0]);
910
911         left -= burst;
912         tx_pkts += burst;
913
914         /* Submit CPT instructions if any */
915         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
916                 /* Reduce pkts to be sent to CPT */
917                 burst -= ((c_lnum << 1) + c_loff);
918                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
919                                      c_shft);
920         }
921
922         /* Trigger LMTST */
923         if (burst > 16) {
924                 data = cn10k_nix_tx_steor_data(flags);
925                 pa = io_addr | (data & 0x7) << 4;
926                 data &= ~0x7ULL;
927                 data |= (15ULL << 12);
928                 data |= (uint64_t)lmt_id;
929
930                 /* STEOR0 */
931                 roc_lmt_submit_steorl(data, pa);
932
933                 data = cn10k_nix_tx_steor_data(flags);
934                 pa = io_addr | (data & 0x7) << 4;
935                 data &= ~0x7ULL;
936                 data |= ((uint64_t)(burst - 17)) << 12;
937                 data |= (uint64_t)(lmt_id + 16);
938
939                 /* STEOR1 */
940                 roc_lmt_submit_steorl(data, pa);
941         } else if (burst) {
942                 data = cn10k_nix_tx_steor_data(flags);
943                 pa = io_addr | (data & 0x7) << 4;
944                 data &= ~0x7ULL;
945                 data |= ((uint64_t)(burst - 1)) << 12;
946                 data |= lmt_id;
947
948                 /* STEOR0 */
949                 roc_lmt_submit_steorl(data, pa);
950         }
951
952         rte_io_wmb();
953         if (left)
954                 goto again;
955
956         return pkts;
957 }
958
959 static __rte_always_inline uint16_t
960 cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
961                          struct rte_mbuf **tx_pkts, uint16_t pkts,
962                          uint64_t *cmd, const uint16_t flags)
963 {
964         struct cn10k_eth_txq *txq = tx_queue;
965         uintptr_t pa0, pa1, lbase = txq->lmt_base;
966         const rte_iova_t io_addr = txq->io_addr;
967         uint16_t segdw, lmt_id, burst, left, i;
968         uint8_t lnum, c_lnum, c_loff;
969         uintptr_t c_lbase = lbase;
970         uint64_t data0, data1;
971         rte_iova_t c_io_addr;
972         uint64_t lso_tun_fmt;
973         uint8_t shft, c_shft;
974         __uint128_t data128;
975         uint16_t c_lmt_id;
976         uint64_t sa_base;
977         uintptr_t laddr;
978         bool sec;
979
980         if (!(flags & NIX_TX_VWQE_F)) {
981                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
982                 /* Reduce the cached count */
983                 txq->fc_cache_pkts -= pkts;
984         }
985         /* Get cmd skeleton */
986         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
987
988         if (flags & NIX_TX_OFFLOAD_TSO_F)
989                 lso_tun_fmt = txq->lso_tun_fmt;
990
991         /* Get LMT base address and LMT ID as lcore id */
992         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
993         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
994                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
995                 c_io_addr = txq->cpt_io_addr;
996                 sa_base = txq->sa_base;
997         }
998
999         left = pkts;
1000 again:
1001         burst = left > 32 ? 32 : left;
1002         shft = 16;
1003         data128 = 0;
1004
1005         lnum = 0;
1006         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1007                 c_lnum = 0;
1008                 c_loff = 0;
1009                 c_shft = 16;
1010         }
1011
1012         for (i = 0; i < burst; i++) {
1013                 /* Perform header writes for TSO, barrier at
1014                  * lmt steorl will suffice.
1015                  */
1016                 if (flags & NIX_TX_OFFLOAD_TSO_F)
1017                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1018
1019                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1020                                        &sec);
1021
1022                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1023
1024                 /* Prepare CPT instruction and get nixtx addr */
1025                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1026                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1027                                            &c_lnum, &c_loff, &c_shft, sa_base,
1028                                            flags);
1029
1030                 /* Move NIX desc to LMT/NIXTX area */
1031                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1032                 /* Store sg list directly on lmt line */
1033                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1034                                                flags);
1035                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1036                                               segdw, flags);
1037                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1038                         lnum++;
1039                         data128 |= (((__uint128_t)(segdw - 1)) << shft);
1040                         shft += 3;
1041                 }
1042         }
1043
1044         if (flags & NIX_TX_VWQE_F)
1045                 roc_sso_hws_head_wait(ws[0]);
1046
1047         left -= burst;
1048         tx_pkts += burst;
1049
1050         /* Submit CPT instructions if any */
1051         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1052                 /* Reduce pkts to be sent to CPT */
1053                 burst -= ((c_lnum << 1) + c_loff);
1054                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1055                                      c_shft);
1056         }
1057
1058         data0 = (uint64_t)data128;
1059         data1 = (uint64_t)(data128 >> 64);
1060         /* Make data0 similar to data1 */
1061         data0 >>= 16;
1062         /* Trigger LMTST */
1063         if (burst > 16) {
1064                 pa0 = io_addr | (data0 & 0x7) << 4;
1065                 data0 &= ~0x7ULL;
1066                 /* Move lmtst1..15 sz to bits 63:19 */
1067                 data0 <<= 16;
1068                 data0 |= (15ULL << 12);
1069                 data0 |= (uint64_t)lmt_id;
1070
1071                 /* STEOR0 */
1072                 roc_lmt_submit_steorl(data0, pa0);
1073
1074                 pa1 = io_addr | (data1 & 0x7) << 4;
1075                 data1 &= ~0x7ULL;
1076                 data1 <<= 16;
1077                 data1 |= ((uint64_t)(burst - 17)) << 12;
1078                 data1 |= (uint64_t)(lmt_id + 16);
1079
1080                 /* STEOR1 */
1081                 roc_lmt_submit_steorl(data1, pa1);
1082         } else if (burst) {
1083                 pa0 = io_addr | (data0 & 0x7) << 4;
1084                 data0 &= ~0x7ULL;
1085                 /* Move lmtst1..15 sz to bits 63:19 */
1086                 data0 <<= 16;
1087                 data0 |= ((burst - 1) << 12);
1088                 data0 |= (uint64_t)lmt_id;
1089
1090                 /* STEOR0 */
1091                 roc_lmt_submit_steorl(data0, pa0);
1092         }
1093
1094         rte_io_wmb();
1095         if (left)
1096                 goto again;
1097
1098         return pkts;
1099 }
1100
1101 #if defined(RTE_ARCH_ARM64)
1102
1103 static __rte_always_inline void
1104 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1105                       union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1106                       const uint64_t flags, const uint64_t lso_tun_fmt)
1107 {
1108         uint16_t lso_sb;
1109         uint64_t mask;
1110
1111         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1112                 return;
1113
1114         mask = -(!w1->il3type);
1115         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1116
1117         w0->u |= BIT(14);
1118         w0->lso_sb = lso_sb;
1119         w0->lso_mps = m->tso_segsz;
1120         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1121         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1122
1123         /* Handle tunnel tso */
1124         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1125             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1126                 const uint8_t is_udp_tun =
1127                         (CNXK_NIX_UDP_TUN_BITMASK >>
1128                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1129                         0x1;
1130                 uint8_t shift = is_udp_tun ? 32 : 0;
1131
1132                 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1133                 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1134
1135                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1136                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1137                 /* Update format for UDP tunneled packet */
1138
1139                 w0->lso_format = (lso_tun_fmt >> shift);
1140         }
1141 }
1142
1143 static __rte_always_inline void
1144 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1145                                 union nix_send_hdr_w0_u *sh,
1146                                 union nix_send_sg_s *sg, const uint32_t flags)
1147 {
1148         struct rte_mbuf *m_next;
1149         uint64_t *slist, sg_u;
1150         uint16_t nb_segs;
1151         int i = 1;
1152
1153         sh->total = m->pkt_len;
1154         /* Clear sg->u header before use */
1155         sg->u &= 0xFC00000000000000;
1156         sg_u = sg->u;
1157         slist = &cmd[0];
1158
1159         sg_u = sg_u | ((uint64_t)m->data_len);
1160
1161         nb_segs = m->nb_segs - 1;
1162         m_next = m->next;
1163
1164         /* Set invert df if buffer is not to be freed by H/W */
1165         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1166                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1167                 /* Mark mempool object as "put" since it is freed by NIX */
1168 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1169         if (!(sg_u & (1ULL << 55)))
1170                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1171         rte_io_wmb();
1172 #endif
1173
1174         m = m_next;
1175         /* Fill mbuf segments */
1176         do {
1177                 m_next = m->next;
1178                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1179                 *slist = rte_mbuf_data_iova(m);
1180                 /* Set invert df if buffer is not to be freed by H/W */
1181                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1182                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1183                         /* Mark mempool object as "put" since it is freed by NIX
1184                          */
1185 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1186                 if (!(sg_u & (1ULL << (i + 55))))
1187                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1188                 rte_io_wmb();
1189 #endif
1190                 slist++;
1191                 i++;
1192                 nb_segs--;
1193                 if (i > 2 && nb_segs) {
1194                         i = 0;
1195                         /* Next SG subdesc */
1196                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1197                         sg->u = sg_u;
1198                         sg->segs = 3;
1199                         sg = (union nix_send_sg_s *)slist;
1200                         sg_u = sg->u;
1201                         slist++;
1202                 }
1203                 m = m_next;
1204         } while (nb_segs);
1205
1206         sg->u = sg_u;
1207         sg->segs = i;
1208 }
1209
1210 static __rte_always_inline void
1211 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1212                            uint64x2_t *cmd1, const uint8_t segdw,
1213                            const uint32_t flags)
1214 {
1215         union nix_send_hdr_w0_u sh;
1216         union nix_send_sg_s sg;
1217
1218         if (m->nb_segs == 1) {
1219                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1220                         sg.u = vgetq_lane_u64(cmd1[0], 0);
1221                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1222                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1223                 }
1224
1225 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1226                 sg.u = vgetq_lane_u64(cmd1[0], 0);
1227                 if (!(sg.u & (1ULL << 55)))
1228                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1229                 rte_io_wmb();
1230 #endif
1231                 return;
1232         }
1233
1234         sh.u = vgetq_lane_u64(cmd0[0], 0);
1235         sg.u = vgetq_lane_u64(cmd1[0], 0);
1236
1237         cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1238
1239         sh.sizem1 = segdw - 1;
1240         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1241         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1242 }
1243
1244 #define NIX_DESCS_PER_LOOP 4
1245
1246 static __rte_always_inline uint8_t
1247 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1248                                uint64x2_t *cmd1, uint64x2_t *cmd2,
1249                                uint64x2_t *cmd3, uint8_t *segdw,
1250                                uint64_t *lmt_addr, __uint128_t *data128,
1251                                uint8_t *shift, const uint16_t flags)
1252 {
1253         uint8_t j, off, lmt_used;
1254
1255         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1256             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1257                 /* No segments in 4 consecutive packets. */
1258                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1259                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1260                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1261                                                            &cmd0[j], &cmd1[j],
1262                                                            segdw[j], flags);
1263                         vst1q_u64(lmt_addr, cmd0[0]);
1264                         vst1q_u64(lmt_addr + 2, cmd1[0]);
1265                         vst1q_u64(lmt_addr + 4, cmd0[1]);
1266                         vst1q_u64(lmt_addr + 6, cmd1[1]);
1267                         vst1q_u64(lmt_addr + 8, cmd0[2]);
1268                         vst1q_u64(lmt_addr + 10, cmd1[2]);
1269                         vst1q_u64(lmt_addr + 12, cmd0[3]);
1270                         vst1q_u64(lmt_addr + 14, cmd1[3]);
1271
1272                         *data128 |= ((__uint128_t)7) << *shift;
1273                         *shift += 3;
1274
1275                         return 1;
1276                 }
1277         }
1278
1279         lmt_used = 0;
1280         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1281                 /* Fit consecutive packets in same LMTLINE. */
1282                 if ((segdw[j] + segdw[j + 1]) <= 8) {
1283                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1284                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1285                                                            &cmd0[j], &cmd1[j],
1286                                                            segdw[j], flags);
1287                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1288                                                            &cmd0[j + 1],
1289                                                            &cmd1[j + 1],
1290                                                            segdw[j + 1], flags);
1291                                 /* TSTAMP takes 4 each, no segs. */
1292                                 vst1q_u64(lmt_addr, cmd0[j]);
1293                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1294                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1295                                 vst1q_u64(lmt_addr + 6, cmd3[j]);
1296
1297                                 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1298                                 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1299                                 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1300                                 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1301                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1302                                 /* EXT header take 3 each, space for 2 segs.*/
1303                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1304                                                            lmt_addr + 6,
1305                                                            &cmd0[j], &cmd1[j],
1306                                                            segdw[j], flags);
1307                                 vst1q_u64(lmt_addr, cmd0[j]);
1308                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1309                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1310                                 off = segdw[j] - 3;
1311                                 off <<= 1;
1312                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1313                                                            lmt_addr + 12 + off,
1314                                                            &cmd0[j + 1],
1315                                                            &cmd1[j + 1],
1316                                                            segdw[j + 1], flags);
1317                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1318                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1319                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1320                         } else {
1321                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1322                                                            lmt_addr + 4,
1323                                                            &cmd0[j], &cmd1[j],
1324                                                            segdw[j], flags);
1325                                 vst1q_u64(lmt_addr, cmd0[j]);
1326                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1327                                 off = segdw[j] - 2;
1328                                 off <<= 1;
1329                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1330                                                            lmt_addr + 8 + off,
1331                                                            &cmd0[j + 1],
1332                                                            &cmd1[j + 1],
1333                                                            segdw[j + 1], flags);
1334                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1335                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1336                         }
1337                         *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1338                                     << *shift;
1339                         *shift += 3;
1340                         j += 2;
1341                 } else {
1342                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
1343                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1344                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1345                                                            lmt_addr + 6,
1346                                                            &cmd0[j], &cmd1[j],
1347                                                            segdw[j], flags);
1348                                 vst1q_u64(lmt_addr, cmd0[j]);
1349                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1350                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1351                                 off = segdw[j] - 4;
1352                                 off <<= 1;
1353                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1354                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1355                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1356                                                            lmt_addr + 6,
1357                                                            &cmd0[j], &cmd1[j],
1358                                                            segdw[j], flags);
1359                                 vst1q_u64(lmt_addr, cmd0[j]);
1360                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1361                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1362                         } else {
1363                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1364                                                            lmt_addr + 4,
1365                                                            &cmd0[j], &cmd1[j],
1366                                                            segdw[j], flags);
1367                                 vst1q_u64(lmt_addr, cmd0[j]);
1368                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1369                         }
1370                         *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1371                         *shift += 3;
1372                         j++;
1373                 }
1374                 lmt_used++;
1375                 lmt_addr += 16;
1376         }
1377
1378         return lmt_used;
1379 }
1380
1381 static __rte_always_inline void
1382 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1383                    uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1384 {
1385         /* Go to next line if we are out of space */
1386         if ((*loff + (dw << 4)) > 128) {
1387                 *data128 = *data128 |
1388                            (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1389                 *shift = *shift + 3;
1390                 *loff = 0;
1391                 *lnum = *lnum + 1;
1392         }
1393
1394         *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1395         *loff = *loff + (dw << 4);
1396 }
1397
1398 static __rte_always_inline void
1399 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1400                      uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1401                      uint64x2_t cmd3, const uint16_t flags)
1402 {
1403         uint8_t off;
1404
1405         /* Handle no fast free when security is enabled without mseg */
1406         if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1407             (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1408             !(flags & NIX_TX_MULTI_SEG_F)) {
1409                 union nix_send_sg_s sg;
1410
1411                 sg.u = vgetq_lane_u64(cmd1, 0);
1412                 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1413                 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1414
1415 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1416                 sg.u = vgetq_lane_u64(cmd1, 0);
1417                 if (!(sg.u & (1ULL << 55)))
1418                         RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1419                                                 0);
1420                 rte_io_wmb();
1421 #endif
1422         }
1423         if (flags & NIX_TX_MULTI_SEG_F) {
1424                 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1425                     (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1426                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1427                                                    &cmd0, &cmd1, segdw, flags);
1428                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1429                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1430                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1431                         off = segdw - 4;
1432                         off <<= 4;
1433                         vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1434                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1435                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1436                                                    &cmd0, &cmd1, segdw, flags);
1437                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1438                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1439                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1440                 } else {
1441                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1442                                                    &cmd0, &cmd1, segdw, flags);
1443                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1444                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1445                 }
1446         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1447                 /* Store the prepared send desc to LMT lines */
1448                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1449                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1450                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1451                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1452                         vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1453                 } else {
1454                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1455                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1456                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1457                 }
1458         } else {
1459                 /* Store the prepared send desc to LMT lines */
1460                 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1461                 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1462         }
1463 }
1464
1465 static __rte_always_inline uint16_t
1466 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1467                            struct rte_mbuf **tx_pkts, uint16_t pkts,
1468                            uint64_t *cmd, const uint16_t flags)
1469 {
1470         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1471         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1472         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1473                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1474         uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1475         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1476         uint64x2_t senddesc01_w0, senddesc23_w0;
1477         uint64x2_t senddesc01_w1, senddesc23_w1;
1478         uint64x2_t sendext01_w0, sendext23_w0;
1479         uint64x2_t sendext01_w1, sendext23_w1;
1480         uint64x2_t sendmem01_w0, sendmem23_w0;
1481         uint64x2_t sendmem01_w1, sendmem23_w1;
1482         uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1483         uint64x2_t sgdesc01_w0, sgdesc23_w0;
1484         uint64x2_t sgdesc01_w1, sgdesc23_w1;
1485         struct cn10k_eth_txq *txq = tx_queue;
1486         rte_iova_t io_addr = txq->io_addr;
1487         uintptr_t laddr = txq->lmt_base;
1488         uint8_t c_lnum, c_shft, c_loff;
1489         uint64x2_t ltypes01, ltypes23;
1490         uint64x2_t xtmp128, ytmp128;
1491         uint64x2_t xmask01, xmask23;
1492         uintptr_t c_laddr = laddr;
1493         uint8_t lnum, shift, loff;
1494         rte_iova_t c_io_addr;
1495         uint64_t sa_base;
1496         union wdata {
1497                 __uint128_t data128;
1498                 uint64_t data[2];
1499         } wd;
1500
1501         if (!(flags & NIX_TX_VWQE_F)) {
1502                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1503                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1504                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1505                 /* Reduce the cached count */
1506                 txq->fc_cache_pkts -= pkts;
1507         } else {
1508                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1509                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1510         }
1511
1512         /* Perform header writes before barrier for TSO */
1513         if (flags & NIX_TX_OFFLOAD_TSO_F) {
1514                 for (i = 0; i < pkts; i++)
1515                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1516         }
1517
1518         if (!(flags & NIX_TX_VWQE_F)) {
1519                 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1520         } else {
1521                 uint64_t w0 =
1522                         (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1523                         ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1524
1525                 senddesc01_w0 = vdupq_n_u64(w0);
1526         }
1527         senddesc23_w0 = senddesc01_w0;
1528
1529         senddesc01_w1 = vdupq_n_u64(0);
1530         senddesc23_w1 = senddesc01_w1;
1531         sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1532         sgdesc23_w0 = sgdesc01_w0;
1533
1534         if (flags & NIX_TX_NEED_EXT_HDR) {
1535                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1536                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1537                                                    BIT_ULL(15));
1538                         sendmem01_w0 =
1539                                 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1540                                             (NIX_SENDMEMALG_SETTSTMP << 56));
1541                         sendmem23_w0 = sendmem01_w0;
1542                         sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1543                         sendmem23_w1 = sendmem01_w1;
1544                 } else {
1545                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1546                 }
1547                 sendext23_w0 = sendext01_w0;
1548
1549                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1550                         sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1551                 else
1552                         sendext01_w1 = vdupq_n_u64(0);
1553                 sendext23_w1 = sendext01_w1;
1554         }
1555
1556         /* Get LMT base address and LMT ID as lcore id */
1557         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1558         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1559                 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1560                 c_io_addr = txq->cpt_io_addr;
1561                 sa_base = txq->sa_base;
1562         }
1563
1564         left = pkts;
1565 again:
1566         /* Number of packets to prepare depends on offloads enabled. */
1567         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1568                               cn10k_nix_pkts_per_vec_brst(flags) :
1569                               left;
1570         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1571                 wd.data128 = 0;
1572                 shift = 16;
1573         }
1574         lnum = 0;
1575         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1576                 loff = 0;
1577                 c_loff = 0;
1578                 c_lnum = 0;
1579                 c_shft = 16;
1580         }
1581
1582         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1583                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1584                         burst = i;
1585                         break;
1586                 }
1587
1588                 if (flags & NIX_TX_MULTI_SEG_F) {
1589                         uint8_t j;
1590
1591                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1592                                 struct rte_mbuf *m = tx_pkts[j];
1593
1594                                 /* Get dwords based on nb_segs. */
1595                                 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1596                                 /* Add dwords based on offloads. */
1597                                 segdw[j] += 1 + /* SEND HDR */
1598                                             !!(flags & NIX_TX_NEED_EXT_HDR) +
1599                                             !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1600                         }
1601
1602                         /* Check if there are enough LMTLINES for this loop */
1603                         if (lnum + 4 > 32) {
1604                                 uint8_t ldwords_con = 0, lneeded = 0;
1605                                 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1606                                         ldwords_con += segdw[j];
1607                                         if (ldwords_con > 8) {
1608                                                 lneeded += 1;
1609                                                 ldwords_con = segdw[j];
1610                                         }
1611                                 }
1612                                 lneeded += 1;
1613                                 if (lnum + lneeded > 32) {
1614                                         burst = i;
1615                                         break;
1616                                 }
1617                         }
1618                 }
1619                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1620                 senddesc01_w0 =
1621                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1622                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1623
1624                 senddesc23_w0 = senddesc01_w0;
1625                 sgdesc23_w0 = sgdesc01_w0;
1626
1627                 /* Clear vlan enables. */
1628                 if (flags & NIX_TX_NEED_EXT_HDR) {
1629                         sendext01_w1 = vbicq_u64(sendext01_w1,
1630                                                  vdupq_n_u64(0x3FFFF00FFFF00));
1631                         sendext23_w1 = sendext01_w1;
1632                 }
1633
1634                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1635                         /* Reset send mem alg to SETTSTMP from SUB*/
1636                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
1637                                                  vdupq_n_u64(BIT_ULL(59)));
1638                         /* Reset send mem address to default. */
1639                         sendmem01_w1 =
1640                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1641                         sendmem23_w0 = sendmem01_w0;
1642                         sendmem23_w1 = sendmem01_w1;
1643                 }
1644
1645                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1646                         /* Clear the LSO enable bit. */
1647                         sendext01_w0 = vbicq_u64(sendext01_w0,
1648                                                  vdupq_n_u64(BIT_ULL(14)));
1649                         sendext23_w0 = sendext01_w0;
1650                 }
1651
1652                 /* Move mbufs to iova */
1653                 mbuf0 = (uint64_t *)tx_pkts[0];
1654                 mbuf1 = (uint64_t *)tx_pkts[1];
1655                 mbuf2 = (uint64_t *)tx_pkts[2];
1656                 mbuf3 = (uint64_t *)tx_pkts[3];
1657
1658                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1659                                      offsetof(struct rte_mbuf, buf_iova));
1660                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1661                                      offsetof(struct rte_mbuf, buf_iova));
1662                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1663                                      offsetof(struct rte_mbuf, buf_iova));
1664                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1665                                      offsetof(struct rte_mbuf, buf_iova));
1666                 /*
1667                  * Get mbuf's, olflags, iova, pktlen, dataoff
1668                  * dataoff_iovaX.D[0] = iova,
1669                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1670                  * len_olflagsX.D[0] = ol_flags,
1671                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1672                  */
1673                 dataoff_iova0 = vld1q_u64(mbuf0);
1674                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1675                 dataoff_iova1 = vld1q_u64(mbuf1);
1676                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1677                 dataoff_iova2 = vld1q_u64(mbuf2);
1678                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1679                 dataoff_iova3 = vld1q_u64(mbuf3);
1680                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1681
1682                 /* Move mbufs to point pool */
1683                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1684                                      offsetof(struct rte_mbuf, pool) -
1685                                      offsetof(struct rte_mbuf, buf_iova));
1686                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1687                                      offsetof(struct rte_mbuf, pool) -
1688                                      offsetof(struct rte_mbuf, buf_iova));
1689                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1690                                      offsetof(struct rte_mbuf, pool) -
1691                                      offsetof(struct rte_mbuf, buf_iova));
1692                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1693                                      offsetof(struct rte_mbuf, pool) -
1694                                      offsetof(struct rte_mbuf, buf_iova));
1695
1696                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1697                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1698                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1699                         /*
1700                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1701                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1702                          */
1703
1704                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1705                                      : [a] "+w"(senddesc01_w1)
1706                                      : [in] "r"(mbuf0 + 2)
1707                                      : "memory");
1708
1709                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1710                                      : [a] "+w"(senddesc01_w1)
1711                                      : [in] "r"(mbuf1 + 2)
1712                                      : "memory");
1713
1714                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1715                                      : [b] "+w"(senddesc23_w1)
1716                                      : [in] "r"(mbuf2 + 2)
1717                                      : "memory");
1718
1719                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1720                                      : [b] "+w"(senddesc23_w1)
1721                                      : [in] "r"(mbuf3 + 2)
1722                                      : "memory");
1723
1724                         /* Get pool pointer alone */
1725                         mbuf0 = (uint64_t *)*mbuf0;
1726                         mbuf1 = (uint64_t *)*mbuf1;
1727                         mbuf2 = (uint64_t *)*mbuf2;
1728                         mbuf3 = (uint64_t *)*mbuf3;
1729                 } else {
1730                         /* Get pool pointer alone */
1731                         mbuf0 = (uint64_t *)*mbuf0;
1732                         mbuf1 = (uint64_t *)*mbuf1;
1733                         mbuf2 = (uint64_t *)*mbuf2;
1734                         mbuf3 = (uint64_t *)*mbuf3;
1735                 }
1736
1737                 const uint8x16_t shuf_mask2 = {
1738                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1739                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1740                 };
1741                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1742                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1743
1744                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1745                 const uint64x2_t and_mask0 = {
1746                         0xFFFFFFFFFFFFFFFF,
1747                         0x000000000000FFFF,
1748                 };
1749
1750                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1751                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1752                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1753                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1754
1755                 /*
1756                  * Pick only 16 bits of pktlen preset at bits 63:32
1757                  * and place them at bits 15:0.
1758                  */
1759                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1760                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1761
1762                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1763                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1764                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1765
1766                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1767                  * pktlen at 15:0 position.
1768                  */
1769                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1770                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1771                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1772                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1773
1774                 /* Move mbuf to point to pool_id. */
1775                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1776                                      offsetof(struct rte_mempool, pool_id));
1777                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1778                                      offsetof(struct rte_mempool, pool_id));
1779                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1780                                      offsetof(struct rte_mempool, pool_id));
1781                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1782                                      offsetof(struct rte_mempool, pool_id));
1783
1784                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1785                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1786                         /*
1787                          * Lookup table to translate ol_flags to
1788                          * il3/il4 types. But we still use ol3/ol4 types in
1789                          * senddesc_w1 as only one header processing is enabled.
1790                          */
1791                         const uint8x16_t tbl = {
1792                                 /* [0-15] = il4type:il3type */
1793                                 0x04, /* none (IPv6 assumed) */
1794                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1795                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1796                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1797                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1798                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1799                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1800                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1801                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1802                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1803                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1804                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1805                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1806                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1807                                        * RTE_MBUF_F_TX_TCP_CKSUM
1808                                        */
1809                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1810                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1811                                        */
1812                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1813                                        * RTE_MBUF_F_TX_UDP_CKSUM
1814                                        */
1815                         };
1816
1817                         /* Extract olflags to translate to iltypes */
1818                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1819                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1820
1821                         /*
1822                          * E(47):L3_LEN(9):L2_LEN(7+z)
1823                          * E(47):L3_LEN(9):L2_LEN(7+z)
1824                          */
1825                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1826                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1827
1828                         /* Move OLFLAGS bits 55:52 to 51:48
1829                          * with zeros preprended on the byte and rest
1830                          * don't care
1831                          */
1832                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1833                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1834                         /*
1835                          * E(48):L3_LEN(8):L2_LEN(z+7)
1836                          * E(48):L3_LEN(8):L2_LEN(z+7)
1837                          */
1838                         const int8x16_t tshft3 = {
1839                                 -1, 0, 8, 8, 8, 8, 8, 8,
1840                                 -1, 0, 8, 8, 8, 8, 8, 8,
1841                         };
1842
1843                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1844                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1845
1846                         /* Do the lookup */
1847                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1848                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1849
1850                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1851                          * and place it in ol3/ol4type of senddesc_w1
1852                          */
1853                         const uint8x16_t shuf_mask0 = {
1854                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1855                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1856                         };
1857
1858                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1859                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1860
1861                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1862                          * a [E(32):E(16):OL3(8):OL2(8)]
1863                          * a = a + (a << 8)
1864                          * a [E(32):E(16):(OL3+OL2):OL2]
1865                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1866                          */
1867                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1868                                                  vshlq_n_u16(senddesc01_w1, 8));
1869                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1870                                                  vshlq_n_u16(senddesc23_w1, 8));
1871
1872                         /* Move ltypes to senddesc*_w1 */
1873                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1874                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1875                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1876                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1877                         /*
1878                          * Lookup table to translate ol_flags to
1879                          * ol3/ol4 types.
1880                          */
1881
1882                         const uint8x16_t tbl = {
1883                                 /* [0-15] = ol4type:ol3type */
1884                                 0x00, /* none */
1885                                 0x03, /* OUTER_IP_CKSUM */
1886                                 0x02, /* OUTER_IPV4 */
1887                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1888                                 0x04, /* OUTER_IPV6 */
1889                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1890                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1891                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1892                                        * OUTER_IP_CKSUM
1893                                        */
1894                                 0x00, /* OUTER_UDP_CKSUM */
1895                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1896                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1897                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1898                                        * OUTER_IP_CKSUM
1899                                        */
1900                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1901                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1902                                        * OUTER_IP_CKSUM
1903                                        */
1904                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1905                                        * OUTER_IPV4
1906                                        */
1907                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1908                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1909                                        */
1910                         };
1911
1912                         /* Extract olflags to translate to iltypes */
1913                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1914                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1915
1916                         /*
1917                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1918                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1919                          */
1920                         const uint8x16_t shuf_mask5 = {
1921                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1922                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1923                         };
1924                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1925                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1926
1927                         /* Extract outer ol flags only */
1928                         const uint64x2_t o_cksum_mask = {
1929                                 0x1C00020000000000,
1930                                 0x1C00020000000000,
1931                         };
1932
1933                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1934                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1935
1936                         /* Extract OUTER_UDP_CKSUM bit 41 and
1937                          * move it to bit 61
1938                          */
1939
1940                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1941                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1942
1943                         /* Shift oltype by 2 to start nibble from BIT(56)
1944                          * instead of BIT(58)
1945                          */
1946                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1947                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1948                         /*
1949                          * E(48):L3_LEN(8):L2_LEN(z+7)
1950                          * E(48):L3_LEN(8):L2_LEN(z+7)
1951                          */
1952                         const int8x16_t tshft3 = {
1953                                 -1, 0, 8, 8, 8, 8, 8, 8,
1954                                 -1, 0, 8, 8, 8, 8, 8, 8,
1955                         };
1956
1957                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1958                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1959
1960                         /* Do the lookup */
1961                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1962                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1963
1964                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1965                          * and place it in ol3/ol4type of senddesc_w1
1966                          */
1967                         const uint8x16_t shuf_mask0 = {
1968                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1969                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1970                         };
1971
1972                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1973                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1974
1975                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1976                          * a [E(32):E(16):OL3(8):OL2(8)]
1977                          * a = a + (a << 8)
1978                          * a [E(32):E(16):(OL3+OL2):OL2]
1979                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1980                          */
1981                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1982                                                  vshlq_n_u16(senddesc01_w1, 8));
1983                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1984                                                  vshlq_n_u16(senddesc23_w1, 8));
1985
1986                         /* Move ltypes to senddesc*_w1 */
1987                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1988                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1989                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1990                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1991                         /* Lookup table to translate ol_flags to
1992                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1993                          */
1994                         const uint8x16x2_t tbl = {{
1995                                 {
1996                                         /* [0-15] = il4type:il3type */
1997                                         0x04, /* none (IPv6) */
1998                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1999                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
2000                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
2001                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
2002                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
2003                                                * RTE_MBUF_F_TX_TCP_CKSUM
2004                                                */
2005                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2006                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2007                                                */
2008                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2009                                                * RTE_MBUF_F_TX_UDP_CKSUM
2010                                                */
2011                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
2012                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
2013                                                * RTE_MBUF_F_TX_TCP_CKSUM
2014                                                */
2015                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
2016                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2017                                                */
2018                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
2019                                                * RTE_MBUF_F_TX_UDP_CKSUM
2020                                                */
2021                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
2022                                                * RTE_MBUF_F_TX_IP_CKSUM
2023                                                */
2024                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2025                                                * RTE_MBUF_F_TX_TCP_CKSUM
2026                                                */
2027                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2028                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2029                                                */
2030                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2031                                                * RTE_MBUF_F_TX_UDP_CKSUM
2032                                                */
2033                                 },
2034
2035                                 {
2036                                         /* [16-31] = ol4type:ol3type */
2037                                         0x00, /* none */
2038                                         0x03, /* OUTER_IP_CKSUM */
2039                                         0x02, /* OUTER_IPV4 */
2040                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2041                                         0x04, /* OUTER_IPV6 */
2042                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2043                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2044                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2045                                                * OUTER_IP_CKSUM
2046                                                */
2047                                         0x00, /* OUTER_UDP_CKSUM */
2048                                         0x33, /* OUTER_UDP_CKSUM |
2049                                                * OUTER_IP_CKSUM
2050                                                */
2051                                         0x32, /* OUTER_UDP_CKSUM |
2052                                                * OUTER_IPV4
2053                                                */
2054                                         0x33, /* OUTER_UDP_CKSUM |
2055                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2056                                                */
2057                                         0x34, /* OUTER_UDP_CKSUM |
2058                                                * OUTER_IPV6
2059                                                */
2060                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2061                                                * OUTER_IP_CKSUM
2062                                                */
2063                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2064                                                * OUTER_IPV4
2065                                                */
2066                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2067                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2068                                                */
2069                                 },
2070                         }};
2071
2072                         /* Extract olflags to translate to oltype & iltype */
2073                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2074                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2075
2076                         /*
2077                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2078                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2079                          */
2080                         const uint32x4_t tshft_4 = {
2081                                 1,
2082                                 0,
2083                                 1,
2084                                 0,
2085                         };
2086                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2087                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2088
2089                         /*
2090                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2091                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2092                          */
2093                         const uint8x16_t shuf_mask5 = {
2094                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2095                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2096                         };
2097                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2098                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2099
2100                         /* Extract outer and inner header ol_flags */
2101                         const uint64x2_t oi_cksum_mask = {
2102                                 0x1CF0020000000000,
2103                                 0x1CF0020000000000,
2104                         };
2105
2106                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2107                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2108
2109                         /* Extract OUTER_UDP_CKSUM bit 41 and
2110                          * move it to bit 61
2111                          */
2112
2113                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2114                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2115
2116                         /* Shift right oltype by 2 and iltype by 4
2117                          * to start oltype nibble from BIT(58)
2118                          * instead of BIT(56) and iltype nibble from BIT(48)
2119                          * instead of BIT(52).
2120                          */
2121                         const int8x16_t tshft5 = {
2122                                 8, 8, 8, 8, 8, 8, -4, -2,
2123                                 8, 8, 8, 8, 8, 8, -4, -2,
2124                         };
2125
2126                         xtmp128 = vshlq_u8(xtmp128, tshft5);
2127                         ytmp128 = vshlq_u8(ytmp128, tshft5);
2128                         /*
2129                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2130                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2131                          */
2132                         const int8x16_t tshft3 = {
2133                                 -1, 0, -1, 0, 0, 0, 0, 0,
2134                                 -1, 0, -1, 0, 0, 0, 0, 0,
2135                         };
2136
2137                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2138                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2139
2140                         /* Mark Bit(4) of oltype */
2141                         const uint64x2_t oi_cksum_mask2 = {
2142                                 0x1000000000000000,
2143                                 0x1000000000000000,
2144                         };
2145
2146                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2147                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2148
2149                         /* Do the lookup */
2150                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2151                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2152
2153                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
2154                          * Bit 56:63 of oltype and place it in corresponding
2155                          * place in senddesc_w1.
2156                          */
2157                         const uint8x16_t shuf_mask0 = {
2158                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2159                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2160                         };
2161
2162                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2163                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2164
2165                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2166                          * l3len, l2len, ol3len, ol2len.
2167                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2168                          * a = a + (a << 8)
2169                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2170                          * a = a + (a << 16)
2171                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2172                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2173                          */
2174                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2175                                                  vshlq_n_u32(senddesc01_w1, 8));
2176                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2177                                                  vshlq_n_u32(senddesc23_w1, 8));
2178
2179                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2180                         senddesc01_w1 = vaddq_u8(
2181                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2182                         senddesc23_w1 = vaddq_u8(
2183                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2184
2185                         /* Move ltypes to senddesc*_w1 */
2186                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2187                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2188                 }
2189
2190                 xmask01 = vdupq_n_u64(0);
2191                 xmask23 = xmask01;
2192                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2193                              : [a] "+w"(xmask01)
2194                              : [in] "r"(mbuf0)
2195                              : "memory");
2196
2197                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2198                              : [a] "+w"(xmask01)
2199                              : [in] "r"(mbuf1)
2200                              : "memory");
2201
2202                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2203                              : [b] "+w"(xmask23)
2204                              : [in] "r"(mbuf2)
2205                              : "memory");
2206
2207                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2208                              : [b] "+w"(xmask23)
2209                              : [in] "r"(mbuf3)
2210                              : "memory");
2211                 xmask01 = vshlq_n_u64(xmask01, 20);
2212                 xmask23 = vshlq_n_u64(xmask23, 20);
2213
2214                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2215                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2216
2217                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2218                         /* Tx ol_flag for vlan. */
2219                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2220                         /* Bit enable for VLAN1 */
2221                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2222                         /* Tx ol_flag for QnQ. */
2223                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2224                         /* Bit enable for VLAN0 */
2225                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2226                         /* Load vlan values from packet. outer is VLAN 0 */
2227                         uint64x2_t ext01 = {
2228                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2229                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2230                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2231                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2232                         };
2233                         uint64x2_t ext23 = {
2234                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2235                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2236                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2237                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2238                         };
2239
2240                         /* Get ol_flags of the packets. */
2241                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2242                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2243
2244                         /* ORR vlan outer/inner values into cmd. */
2245                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2246                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2247
2248                         /* Test for offload enable bits and generate masks. */
2249                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2250                                                       mlv),
2251                                             vandq_u64(vtstq_u64(xtmp128, olq),
2252                                                       mlq));
2253                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2254                                                       mlv),
2255                                             vandq_u64(vtstq_u64(ytmp128, olq),
2256                                                       mlq));
2257
2258                         /* Set vlan enable bits into cmd based on mask. */
2259                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2260                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2261                 }
2262
2263                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2264                         /* Tx ol_flag for timestamp. */
2265                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2266                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
2267                         /* Set send mem alg to SUB. */
2268                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2269                         /* Increment send mem address by 8. */
2270                         const uint64x2_t addr = {0x8, 0x8};
2271
2272                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2273                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2274
2275                         /* Check if timestamp is requested and generate inverted
2276                          * mask as we need not make any changes to default cmd
2277                          * value.
2278                          */
2279                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2280                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2281
2282                         /* Change send mem address to an 8 byte offset when
2283                          * TSTMP is disabled.
2284                          */
2285                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
2286                                                  vandq_u64(xtmp128, addr));
2287                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
2288                                                  vandq_u64(ytmp128, addr));
2289                         /* Change send mem alg to SUB when TSTMP is disabled. */
2290                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
2291                                                  vandq_u64(xtmp128, alg));
2292                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
2293                                                  vandq_u64(ytmp128, alg));
2294
2295                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2296                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2297                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2298                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2299                 }
2300
2301                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2302                         const uint64_t lso_fmt = txq->lso_tun_fmt;
2303                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2304                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2305
2306                         /* Extract SD W1 as we need to set L4 types. */
2307                         vst1q_u64(sd_w1, senddesc01_w1);
2308                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
2309
2310                         /* Extract SX W0 as we need to set LSO fields. */
2311                         vst1q_u64(sx_w0, sendext01_w0);
2312                         vst1q_u64(sx_w0 + 2, sendext23_w0);
2313
2314                         /* Extract ol_flags. */
2315                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2316                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2317
2318                         /* Prepare individual mbufs. */
2319                         cn10k_nix_prepare_tso(tx_pkts[0],
2320                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
2321                                 (union nix_send_ext_w0_u *)&sx_w0[0],
2322                                 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2323
2324                         cn10k_nix_prepare_tso(tx_pkts[1],
2325                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
2326                                 (union nix_send_ext_w0_u *)&sx_w0[1],
2327                                 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2328
2329                         cn10k_nix_prepare_tso(tx_pkts[2],
2330                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
2331                                 (union nix_send_ext_w0_u *)&sx_w0[2],
2332                                 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2333
2334                         cn10k_nix_prepare_tso(tx_pkts[3],
2335                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
2336                                 (union nix_send_ext_w0_u *)&sx_w0[3],
2337                                 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2338
2339                         senddesc01_w1 = vld1q_u64(sd_w1);
2340                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2341
2342                         sendext01_w0 = vld1q_u64(sx_w0);
2343                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
2344                 }
2345
2346                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2347                     !(flags & NIX_TX_MULTI_SEG_F) &&
2348                     !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2349                         /* Set don't free bit if reference count > 1 */
2350                         xmask01 = vdupq_n_u64(0);
2351                         xmask23 = xmask01;
2352
2353                         /* Move mbufs to iova */
2354                         mbuf0 = (uint64_t *)tx_pkts[0];
2355                         mbuf1 = (uint64_t *)tx_pkts[1];
2356                         mbuf2 = (uint64_t *)tx_pkts[2];
2357                         mbuf3 = (uint64_t *)tx_pkts[3];
2358
2359                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2360                                 vsetq_lane_u64(0x80000, xmask01, 0);
2361                         else
2362                                 RTE_MEMPOOL_CHECK_COOKIES(
2363                                         ((struct rte_mbuf *)mbuf0)->pool,
2364                                         (void **)&mbuf0, 1, 0);
2365
2366                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2367                                 vsetq_lane_u64(0x80000, xmask01, 1);
2368                         else
2369                                 RTE_MEMPOOL_CHECK_COOKIES(
2370                                         ((struct rte_mbuf *)mbuf1)->pool,
2371                                         (void **)&mbuf1, 1, 0);
2372
2373                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2374                                 vsetq_lane_u64(0x80000, xmask23, 0);
2375                         else
2376                                 RTE_MEMPOOL_CHECK_COOKIES(
2377                                         ((struct rte_mbuf *)mbuf2)->pool,
2378                                         (void **)&mbuf2, 1, 0);
2379
2380                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2381                                 vsetq_lane_u64(0x80000, xmask23, 1);
2382                         else
2383                                 RTE_MEMPOOL_CHECK_COOKIES(
2384                                         ((struct rte_mbuf *)mbuf3)->pool,
2385                                         (void **)&mbuf3, 1, 0);
2386                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2387                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2388                 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2389                            !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2390                         /* Move mbufs to iova */
2391                         mbuf0 = (uint64_t *)tx_pkts[0];
2392                         mbuf1 = (uint64_t *)tx_pkts[1];
2393                         mbuf2 = (uint64_t *)tx_pkts[2];
2394                         mbuf3 = (uint64_t *)tx_pkts[3];
2395
2396                         /* Mark mempool object as "put" since
2397                          * it is freed by NIX
2398                          */
2399                         RTE_MEMPOOL_CHECK_COOKIES(
2400                                 ((struct rte_mbuf *)mbuf0)->pool,
2401                                 (void **)&mbuf0, 1, 0);
2402
2403                         RTE_MEMPOOL_CHECK_COOKIES(
2404                                 ((struct rte_mbuf *)mbuf1)->pool,
2405                                 (void **)&mbuf1, 1, 0);
2406
2407                         RTE_MEMPOOL_CHECK_COOKIES(
2408                                 ((struct rte_mbuf *)mbuf2)->pool,
2409                                 (void **)&mbuf2, 1, 0);
2410
2411                         RTE_MEMPOOL_CHECK_COOKIES(
2412                                 ((struct rte_mbuf *)mbuf3)->pool,
2413                                 (void **)&mbuf3, 1, 0);
2414                 }
2415
2416                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2417                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2418                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2419                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2420                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2421
2422                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2423                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2424                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2425                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2426
2427                 if (flags & NIX_TX_NEED_EXT_HDR) {
2428                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2429                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2430                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2431                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2432                 }
2433
2434                 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2435                         const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2436                                                 RTE_MBUF_F_TX_SEC_OFFLOAD};
2437                         uintptr_t next;
2438                         uint8_t dw;
2439
2440                         /* Extract ol_flags. */
2441                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2442                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2443
2444                         xtmp128 = vtstq_u64(olf, xtmp128);
2445                         ytmp128 = vtstq_u64(olf, ytmp128);
2446
2447                         /* Process mbuf0 */
2448                         dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2449                         if (vgetq_lane_u64(xtmp128, 0))
2450                                 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2451                                                        &cmd1[0], &next, c_laddr,
2452                                                        &c_lnum, &c_loff,
2453                                                        &c_shft, sa_base, flags);
2454                         else
2455                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2456                                                    &shift, &wd.data128, &next);
2457
2458                         /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2459                         cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2460                                              cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2461                                              flags);
2462
2463                         /* Process mbuf1 */
2464                         dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2465                         if (vgetq_lane_u64(xtmp128, 1))
2466                                 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2467                                                        &cmd1[1], &next, c_laddr,
2468                                                        &c_lnum, &c_loff,
2469                                                        &c_shft, sa_base, flags);
2470                         else
2471                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2472                                                    &shift, &wd.data128, &next);
2473
2474                         /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2475                         cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2476                                              cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2477                                              flags);
2478
2479                         /* Process mbuf2 */
2480                         dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2481                         if (vgetq_lane_u64(ytmp128, 0))
2482                                 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2483                                                        &cmd1[2], &next, c_laddr,
2484                                                        &c_lnum, &c_loff,
2485                                                        &c_shft, sa_base, flags);
2486                         else
2487                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2488                                                    &shift, &wd.data128, &next);
2489
2490                         /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2491                         cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2492                                              cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2493                                              flags);
2494
2495                         /* Process mbuf3 */
2496                         dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2497                         if (vgetq_lane_u64(ytmp128, 1))
2498                                 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2499                                                        &cmd1[3], &next, c_laddr,
2500                                                        &c_lnum, &c_loff,
2501                                                        &c_shft, sa_base, flags);
2502                         else
2503                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2504                                                    &shift, &wd.data128, &next);
2505
2506                         /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2507                         cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2508                                              cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2509                                              flags);
2510
2511                 } else if (flags & NIX_TX_MULTI_SEG_F) {
2512                         uint8_t j;
2513
2514                         segdw[4] = 8;
2515                         j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2516                                                           cmd2, cmd3, segdw,
2517                                                           (uint64_t *)
2518                                                           LMT_OFF(laddr, lnum,
2519                                                                   0),
2520                                                           &wd.data128, &shift,
2521                                                           flags);
2522                         lnum += j;
2523                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2524                         /* Store the prepared send desc to LMT lines */
2525                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2526                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2527                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2528                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2529                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2530                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2531                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2532                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2533                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2534                                 lnum += 1;
2535                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2536                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2537                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2538                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2539                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2540                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2541                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2542                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2543                         } else {
2544                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2545                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2546                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2547                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2548                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2549                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2550                                 lnum += 1;
2551                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2552                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2553                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2554                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2555                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2556                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2557                         }
2558                         lnum += 1;
2559                 } else {
2560                         /* Store the prepared send desc to LMT lines */
2561                         vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2562                         vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2563                         vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2564                         vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2565                         vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2566                         vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2567                         vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2568                         vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2569                         lnum += 1;
2570                 }
2571
2572                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2573         }
2574
2575         /* Roundup lnum to last line if it is partial */
2576         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2577                 lnum = lnum + !!loff;
2578                 wd.data128 = wd.data128 |
2579                         (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2580         }
2581
2582         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2583                 wd.data[0] >>= 16;
2584
2585         if (flags & NIX_TX_VWQE_F)
2586                 roc_sso_hws_head_wait(ws[0]);
2587
2588         left -= burst;
2589
2590         /* Submit CPT instructions if any */
2591         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2592                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2593                                      c_shft);
2594
2595         /* Trigger LMTST */
2596         if (lnum > 16) {
2597                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2598                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2599
2600                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2601                 wd.data[0] &= ~0x7ULL;
2602
2603                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2604                         wd.data[0] <<= 16;
2605
2606                 wd.data[0] |= (15ULL << 12);
2607                 wd.data[0] |= (uint64_t)lmt_id;
2608
2609                 /* STEOR0 */
2610                 roc_lmt_submit_steorl(wd.data[0], pa);
2611
2612                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2613                         wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2614
2615                 pa = io_addr | (wd.data[1] & 0x7) << 4;
2616                 wd.data[1] &= ~0x7ULL;
2617
2618                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2619                         wd.data[1] <<= 16;
2620
2621                 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2622                 wd.data[1] |= (uint64_t)(lmt_id + 16);
2623
2624                 /* STEOR1 */
2625                 roc_lmt_submit_steorl(wd.data[1], pa);
2626         } else if (lnum) {
2627                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2628                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2629
2630                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2631                 wd.data[0] &= ~0x7ULL;
2632
2633                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2634                         wd.data[0] <<= 16;
2635
2636                 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2637                 wd.data[0] |= lmt_id;
2638
2639                 /* STEOR0 */
2640                 roc_lmt_submit_steorl(wd.data[0], pa);
2641         }
2642
2643         rte_io_wmb();
2644         if (left)
2645                 goto again;
2646
2647         if (unlikely(scalar)) {
2648                 if (flags & NIX_TX_MULTI_SEG_F)
2649                         pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2650                                                          scalar, cmd, flags);
2651                 else
2652                         pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2653                                                     scalar, cmd, flags);
2654         }
2655
2656         return pkts;
2657 }
2658
2659 #else
2660 static __rte_always_inline uint16_t
2661 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2662                            struct rte_mbuf **tx_pkts, uint16_t pkts,
2663                            uint64_t *cmd, const uint16_t flags)
2664 {
2665         RTE_SET_USED(ws);
2666         RTE_SET_USED(tx_queue);
2667         RTE_SET_USED(tx_pkts);
2668         RTE_SET_USED(pkts);
2669         RTE_SET_USED(cmd);
2670         RTE_SET_USED(flags);
2671         return 0;
2672 }
2673 #endif
2674
2675 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2676 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2677 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2678 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2679 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
2680 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2681 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2682
2683 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2684 #define NIX_TX_FASTPATH_MODES_0_15                                             \
2685         T(no_offload, 4, NIX_TX_OFFLOAD_NONE)                                  \
2686         T(l3l4csum, 4, L3L4CSUM_F)                                             \
2687         T(ol3ol4csum, 4, OL3OL4CSUM_F)                                         \
2688         T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F)                   \
2689         T(vlan, 6, VLAN_F)                                                     \
2690         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
2691         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
2692         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2693         T(noff, 4, NOFF_F)                                                     \
2694         T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F)                               \
2695         T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F)                           \
2696         T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2697         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
2698         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2699         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
2700         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
2701           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2702
2703 #define NIX_TX_FASTPATH_MODES_16_31                                            \
2704         T(tso, 6, TSO_F)                                                       \
2705         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
2706         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
2707         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
2708         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
2709         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
2710         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
2711         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2712           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2713         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
2714         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
2715         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
2716         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
2717           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2718         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
2719         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
2720         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2721         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2722           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2723
2724 #define NIX_TX_FASTPATH_MODES_32_47                                            \
2725         T(ts, 8, TSP_F)                                                        \
2726         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
2727         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
2728         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2729         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
2730         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
2731         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2732         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
2733           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2734         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
2735         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
2736         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2737         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
2738           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2739         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
2740         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
2741         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
2742         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
2743           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2744
2745 #define NIX_TX_FASTPATH_MODES_48_63                                            \
2746         T(ts_tso, 8, TSP_F | TSO_F)                                            \
2747         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
2748         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
2749         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
2750           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
2751         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
2752         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
2753         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
2754         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2755           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2756         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
2757         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
2758         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
2759         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
2760           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2761         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
2762         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
2763           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
2764         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
2765           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
2766         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2767           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2768
2769 #define NIX_TX_FASTPATH_MODES_64_79                                            \
2770         T(sec, 4, T_SEC_F)                                                     \
2771         T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F)                               \
2772         T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F)                           \
2773         T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2774         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
2775         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
2776         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
2777         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2778           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2779         T(sec_noff, 4, T_SEC_F | NOFF_F)                                       \
2780         T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2781         T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2782         T(sec_noff_ol3ol4csum_l3l4csum, 4,                                     \
2783           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2784         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2785         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2786         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2787           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2788         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2789           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2790
2791 #define NIX_TX_FASTPATH_MODES_80_95                                            \
2792         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2793         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2794         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2795         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2796           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2797         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2798         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2799         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2800         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2801           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2802         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2803         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2804         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2805         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2806           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2807         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2808         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2809           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2810         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2811           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2812         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2813           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2814
2815 #define NIX_TX_FASTPATH_MODES_96_111                                           \
2816         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2817         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2818         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2819         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2820           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2821         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2822         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2823         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2824         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2825           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2826         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2827         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2828         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2829         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2830           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2831         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2832         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2833           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2834         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2835           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2836         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2837           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2838
2839 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2840         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2841         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2842         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2843         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2844           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2845         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2846         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2847           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2848         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2849           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2850         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2851           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2852         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2853         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2854           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2855         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2856           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2857         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2858           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2859         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2860         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2861           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2862         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2863           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2864         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2865           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2866                   L3L4CSUM_F)
2867
2868 #define NIX_TX_FASTPATH_MODES                                                  \
2869         NIX_TX_FASTPATH_MODES_0_15                                             \
2870         NIX_TX_FASTPATH_MODES_16_31                                            \
2871         NIX_TX_FASTPATH_MODES_32_47                                            \
2872         NIX_TX_FASTPATH_MODES_48_63                                            \
2873         NIX_TX_FASTPATH_MODES_64_79                                            \
2874         NIX_TX_FASTPATH_MODES_80_95                                            \
2875         NIX_TX_FASTPATH_MODES_96_111                                           \
2876         NIX_TX_FASTPATH_MODES_112_127
2877
2878 #define T(name, sz, flags)                                                     \
2879         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
2880                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2881         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
2882                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2883         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
2884                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2885         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2886                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2887
2888 NIX_TX_FASTPATH_MODES
2889 #undef T
2890
2891 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2892         uint16_t __rte_noinline __rte_hot fn(                                  \
2893                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2894         {                                                                      \
2895                 uint64_t cmd[sz];                                              \
2896                 /* For TSO inner checksum is a must */                         \
2897                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2898                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2899                         return 0;                                              \
2900                 return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
2901                                            flags);                             \
2902         }
2903
2904 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2905         uint16_t __rte_noinline __rte_hot fn(                                  \
2906                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2907         {                                                                      \
2908                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2909                 /* For TSO inner checksum is a must */                         \
2910                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2911                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2912                         return 0;                                              \
2913                 return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
2914                                                 cmd,                           \
2915                                                 flags | NIX_TX_MULTI_SEG_F);   \
2916         }
2917
2918 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
2919         uint16_t __rte_noinline __rte_hot fn(                                  \
2920                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2921         {                                                                      \
2922                 uint64_t cmd[sz];                                              \
2923                 /* For TSO inner checksum is a must */                         \
2924                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2925                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2926                         return 0;                                              \
2927                 return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts,     \
2928                                                   pkts, cmd, (flags));         \
2929         }
2930
2931 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
2932         uint16_t __rte_noinline __rte_hot fn(                                  \
2933                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2934         {                                                                      \
2935                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2936                 /* For TSO inner checksum is a must */                         \
2937                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2938                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2939                         return 0;                                              \
2940                 return cn10k_nix_xmit_pkts_vector(                             \
2941                         tx_queue, NULL, tx_pkts, pkts, cmd,                    \
2942                         (flags) | NIX_TX_MULTI_SEG_F);                         \
2943         }
2944
2945 #endif /* __CN10K_TX_H__ */