net/cnxk: support Tx security offload on cn10k
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #include <rte_eventdev.h>
10
11 #define NIX_TX_OFFLOAD_NONE           (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
19
20 /* Flags to control xmit_prepare function.
21  * Defining it from backwards to denote its been
22  * not used as offload flags to pick function
23  */
24 #define NIX_TX_VWQE_F      BIT(14)
25 #define NIX_TX_MULTI_SEG_F BIT(15)
26
27 #define NIX_TX_NEED_SEND_HDR_W1                                                \
28         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
29          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
30
31 #define NIX_TX_NEED_EXT_HDR                                                    \
32         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
33          NIX_TX_OFFLOAD_TSO_F)
34
35 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
36         do {                                                                   \
37                 /* Cached value is low, Update the fc_cache_pkts */            \
38                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
39                         /* Multiply with sqe_per_sqb to express in pkts */     \
40                         (txq)->fc_cache_pkts =                                 \
41                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
42                                 << (txq)->sqes_per_sqb_log2;                   \
43                         /* Check it again for the room */                      \
44                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
45                                 return 0;                                      \
46                 }                                                              \
47         } while (0)
48
49 /* Encoded number of segments to number of dwords macro, each value of nb_segs
50  * is encoded as 4bits.
51  */
52 #define NIX_SEGDW_MAGIC 0x76654432210ULL
53
54 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
55
56 /* Function to determine no of tx subdesc required in case ext
57  * sub desc is enabled.
58  */
59 static __rte_always_inline int
60 cn10k_nix_tx_ext_subs(const uint16_t flags)
61 {
62         return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
63                              2 :
64                              ((flags &
65                          (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
66                                       1 :
67                                       0);
68 }
69
70 static __rte_always_inline uint8_t
71 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
72 {
73         if (!(flags & NIX_TX_MULTI_SEG_F))
74                 return cn10k_nix_tx_ext_subs(flags) + 2;
75
76         /* Already everything is accounted for in segdw */
77         return segdw;
78 }
79
80 static __rte_always_inline uint8_t
81 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
82 {
83         return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
84                << ROC_LMT_LINES_PER_CORE_LOG2;
85 }
86
87 static __rte_always_inline uint8_t
88 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
89 {
90         return (flags & NIX_TX_NEED_EXT_HDR) ?
91                              ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
92                              8;
93 }
94
95 static __rte_always_inline uint64_t
96 cn10k_nix_tx_steor_data(const uint16_t flags)
97 {
98         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
99         uint64_t data;
100
101         /* This will be moved to addr area */
102         data = dw_m1;
103         /* 15 vector sizes for single seg */
104         data |= dw_m1 << 19;
105         data |= dw_m1 << 22;
106         data |= dw_m1 << 25;
107         data |= dw_m1 << 28;
108         data |= dw_m1 << 31;
109         data |= dw_m1 << 34;
110         data |= dw_m1 << 37;
111         data |= dw_m1 << 40;
112         data |= dw_m1 << 43;
113         data |= dw_m1 << 46;
114         data |= dw_m1 << 49;
115         data |= dw_m1 << 52;
116         data |= dw_m1 << 55;
117         data |= dw_m1 << 58;
118         data |= dw_m1 << 61;
119
120         return data;
121 }
122
123 static __rte_always_inline uint8_t
124 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
125 {
126         return ((flags & NIX_TX_NEED_EXT_HDR) ?
127                               (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
128                               4);
129 }
130
131 static __rte_always_inline uint64_t
132 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
133 {
134         const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
135         uint64_t data;
136
137         /* This will be moved to addr area */
138         data = dw_m1;
139         /* 15 vector sizes for single seg */
140         data |= dw_m1 << 19;
141         data |= dw_m1 << 22;
142         data |= dw_m1 << 25;
143         data |= dw_m1 << 28;
144         data |= dw_m1 << 31;
145         data |= dw_m1 << 34;
146         data |= dw_m1 << 37;
147         data |= dw_m1 << 40;
148         data |= dw_m1 << 43;
149         data |= dw_m1 << 46;
150         data |= dw_m1 << 49;
151         data |= dw_m1 << 52;
152         data |= dw_m1 << 55;
153         data |= dw_m1 << 58;
154         data |= dw_m1 << 61;
155
156         return data;
157 }
158
159 static __rte_always_inline uint64_t
160 cn10k_cpt_tx_steor_data(void)
161 {
162         /* We have two CPT instructions per LMTLine */
163         const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
164         uint64_t data;
165
166         /* This will be moved to addr area */
167         data = dw_m1 << 16;
168         data |= dw_m1 << 19;
169         data |= dw_m1 << 22;
170         data |= dw_m1 << 25;
171         data |= dw_m1 << 28;
172         data |= dw_m1 << 31;
173         data |= dw_m1 << 34;
174         data |= dw_m1 << 37;
175         data |= dw_m1 << 40;
176         data |= dw_m1 << 43;
177         data |= dw_m1 << 46;
178         data |= dw_m1 << 49;
179         data |= dw_m1 << 52;
180         data |= dw_m1 << 55;
181         data |= dw_m1 << 58;
182         data |= dw_m1 << 61;
183
184         return data;
185 }
186
187 static __rte_always_inline void
188 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
189                       const uint16_t flags)
190 {
191         /* Send hdr */
192         cmd[0] = txq->send_hdr_w0;
193         cmd[1] = 0;
194         cmd += 2;
195
196         /* Send ext if present */
197         if (flags & NIX_TX_NEED_EXT_HDR) {
198                 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
199                 cmd += 2;
200         }
201
202         /* Send sg */
203         cmd[0] = txq->sg_w0;
204         cmd[1] = 0;
205 }
206
207 static __rte_always_inline void
208 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
209                      uint8_t loff, uint8_t shft)
210 {
211         uint64_t data;
212         uintptr_t pa;
213
214         /* Check if there is any CPT instruction to submit */
215         if (!lnum && !loff)
216                 return;
217
218         data = cn10k_cpt_tx_steor_data();
219         /* Update lmtline use for partial end line */
220         if (loff) {
221                 data &= ~(0x7ULL << shft);
222                 /* Update it to half full i.e 64B */
223                 data |= (0x3UL << shft);
224         }
225
226         pa = io_addr | ((data >> 16) & 0x7) << 4;
227         data &= ~(0x7ULL << 16);
228         /* Update lines - 1 that contain valid data */
229         data |= ((uint64_t)(lnum + loff - 1)) << 12;
230         data |= lmt_id;
231
232         /* STEOR */
233         roc_lmt_submit_steorl(data, pa);
234 }
235
236 #if defined(RTE_ARCH_ARM64)
237 static __rte_always_inline void
238 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
239                        uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
240                        uint8_t *loff, uint8_t *shft, uint64_t sa_base,
241                        const uint16_t flags)
242 {
243         struct cn10k_sec_sess_priv sess_priv;
244         uint32_t pkt_len, dlen_adj, rlen;
245         uint64x2_t cmd01, cmd23;
246         uintptr_t dptr, nixtx;
247         uint64_t ucode_cmd[4];
248         uint64_t *laddr;
249         uint8_t l2_len;
250         uint16_t tag;
251         uint64_t sa;
252
253         sess_priv.u64 = *rte_security_dynfield(m);
254
255         if (flags & NIX_TX_NEED_SEND_HDR_W1)
256                 l2_len = vgetq_lane_u8(*cmd0, 8);
257         else
258                 l2_len = m->l2_len;
259
260         /* Retrieve DPTR */
261         dptr = vgetq_lane_u64(*cmd1, 1);
262         pkt_len = vgetq_lane_u16(*cmd0, 0);
263
264         /* Calculate dlen adj */
265         dlen_adj = pkt_len - l2_len;
266         rlen = (dlen_adj + sess_priv.roundup_len) +
267                (sess_priv.roundup_byte - 1);
268         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
269         rlen += sess_priv.partial_len;
270         dlen_adj = rlen - dlen_adj;
271
272         /* Update send descriptors. Security is single segment only */
273         *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
274         *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
275
276         /* Get area where NIX descriptor needs to be stored */
277         nixtx = dptr + pkt_len + dlen_adj;
278         nixtx += BIT_ULL(7);
279         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
280
281         /* Return nixtx addr */
282         *nixtx_addr = (nixtx + 16);
283
284         /* DLEN passed is excluding L2HDR */
285         pkt_len -= l2_len;
286         tag = sa_base & 0xFFFFUL;
287         sa_base &= ~0xFFFFUL;
288         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
289         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
290         ucode_cmd[0] =
291                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
292
293         /* CPT Word 0 and Word 1 */
294         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
295         /* CPT_RES_S is 16B above NIXTX */
296         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
297
298         /* CPT word 2 and 3 */
299         cmd23 = vdupq_n_u64(0);
300         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
301                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
302         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
303
304         dptr += l2_len;
305         ucode_cmd[1] = dptr;
306         ucode_cmd[2] = dptr;
307
308         /* Move to our line */
309         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
310
311         /* Write CPT instruction to lmt line */
312         vst1q_u64(laddr, cmd01);
313         vst1q_u64((laddr + 2), cmd23);
314
315         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
316         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
317
318         /* Move to next line for every other CPT inst */
319         *loff = !(*loff);
320         *lnum = *lnum + (*loff ? 0 : 1);
321         *shft = *shft + (*loff ? 0 : 3);
322 }
323
324 static __rte_always_inline void
325 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
326                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
327                    uint64_t sa_base, const uint16_t flags)
328 {
329         struct cn10k_sec_sess_priv sess_priv;
330         uint32_t pkt_len, dlen_adj, rlen;
331         struct nix_send_hdr_s *send_hdr;
332         uint64x2_t cmd01, cmd23;
333         union nix_send_sg_s *sg;
334         uintptr_t dptr, nixtx;
335         uint64_t ucode_cmd[4];
336         uint64_t *laddr;
337         uint8_t l2_len;
338         uint16_t tag;
339         uint64_t sa;
340
341         /* Move to our line from base */
342         sess_priv.u64 = *rte_security_dynfield(m);
343         send_hdr = (struct nix_send_hdr_s *)cmd;
344         if (flags & NIX_TX_NEED_EXT_HDR)
345                 sg = (union nix_send_sg_s *)&cmd[4];
346         else
347                 sg = (union nix_send_sg_s *)&cmd[2];
348
349         if (flags & NIX_TX_NEED_SEND_HDR_W1)
350                 l2_len = cmd[1] & 0xFF;
351         else
352                 l2_len = m->l2_len;
353
354         /* Retrieve DPTR */
355         dptr = *(uint64_t *)(sg + 1);
356         pkt_len = send_hdr->w0.total;
357
358         /* Calculate dlen adj */
359         dlen_adj = pkt_len - l2_len;
360         rlen = (dlen_adj + sess_priv.roundup_len) +
361                (sess_priv.roundup_byte - 1);
362         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
363         rlen += sess_priv.partial_len;
364         dlen_adj = rlen - dlen_adj;
365
366         /* Update send descriptors. Security is single segment only */
367         send_hdr->w0.total = pkt_len + dlen_adj;
368         sg->seg1_size = pkt_len + dlen_adj;
369
370         /* Get area where NIX descriptor needs to be stored */
371         nixtx = dptr + pkt_len + dlen_adj;
372         nixtx += BIT_ULL(7);
373         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
374
375         /* Return nixtx addr */
376         *nixtx_addr = (nixtx + 16);
377
378         /* DLEN passed is excluding L2HDR */
379         pkt_len -= l2_len;
380         tag = sa_base & 0xFFFFUL;
381         sa_base &= ~0xFFFFUL;
382         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
383         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
384         ucode_cmd[0] =
385                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
386
387         /* CPT Word 0 and Word 1. Assume no multi-seg support */
388         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
389         /* CPT_RES_S is 16B above NIXTX */
390         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
391
392         /* CPT word 2 and 3 */
393         cmd23 = vdupq_n_u64(0);
394         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
395                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
396         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
397
398         dptr += l2_len;
399         ucode_cmd[1] = dptr;
400         ucode_cmd[2] = dptr;
401
402         /* Move to our line */
403         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
404
405         /* Write CPT instruction to lmt line */
406         vst1q_u64(laddr, cmd01);
407         vst1q_u64((laddr + 2), cmd23);
408
409         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
410         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
411
412         /* Move to next line for every other CPT inst */
413         *loff = !(*loff);
414         *lnum = *lnum + (*loff ? 0 : 1);
415         *shft = *shft + (*loff ? 0 : 3);
416 }
417
418 #else
419
420 static __rte_always_inline void
421 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
422                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
423                    uint64_t sa_base, const uint16_t flags)
424 {
425         RTE_SET_USED(m);
426         RTE_SET_USED(cmd);
427         RTE_SET_USED(nixtx_addr);
428         RTE_SET_USED(lbase);
429         RTE_SET_USED(lnum);
430         RTE_SET_USED(loff);
431         RTE_SET_USED(shft);
432         RTE_SET_USED(sa_base);
433         RTE_SET_USED(flags);
434 }
435 #endif
436
437 static __rte_always_inline void
438 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
439 {
440         uint64_t mask, ol_flags = m->ol_flags;
441
442         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
443                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
444                 uint16_t *iplen, *oiplen, *oudplen;
445                 uint16_t lso_sb, paylen;
446
447                 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
448                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
449                          m->l2_len + m->l3_len + m->l4_len;
450
451                 /* Reduce payload len from base headers */
452                 paylen = m->pkt_len - lso_sb;
453
454                 /* Get iplen position assuming no tunnel hdr */
455                 iplen = (uint16_t *)(mdata + m->l2_len +
456                                      (2 << !!(ol_flags & PKT_TX_IPV6)));
457                 /* Handle tunnel tso */
458                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
459                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
460                         const uint8_t is_udp_tun =
461                                 (CNXK_NIX_UDP_TUN_BITMASK >>
462                                  ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
463                                 0x1;
464
465                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
466                                               (2 << !!(ol_flags &
467                                                        PKT_TX_OUTER_IPV6)));
468                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
469                                                    paylen);
470
471                         /* Update format for UDP tunneled packet */
472                         if (is_udp_tun) {
473                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
474                                                        m->outer_l3_len + 4);
475                                 *oudplen = rte_cpu_to_be_16(
476                                         rte_be_to_cpu_16(*oudplen) - paylen);
477                         }
478
479                         /* Update iplen position to inner ip hdr */
480                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
481                                              m->l4_len +
482                                              (2 << !!(ol_flags & PKT_TX_IPV6)));
483                 }
484
485                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
486         }
487 }
488
489 static __rte_always_inline void
490 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
491                        const uint64_t lso_tun_fmt, bool *sec)
492 {
493         struct nix_send_ext_s *send_hdr_ext;
494         struct nix_send_hdr_s *send_hdr;
495         uint64_t ol_flags = 0, mask;
496         union nix_send_hdr_w1_u w1;
497         union nix_send_sg_s *sg;
498
499         send_hdr = (struct nix_send_hdr_s *)cmd;
500         if (flags & NIX_TX_NEED_EXT_HDR) {
501                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
502                 sg = (union nix_send_sg_s *)(cmd + 4);
503                 /* Clear previous markings */
504                 send_hdr_ext->w0.lso = 0;
505                 send_hdr_ext->w1.u = 0;
506         } else {
507                 sg = (union nix_send_sg_s *)(cmd + 2);
508         }
509
510         if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
511                 ol_flags = m->ol_flags;
512                 w1.u = 0;
513         }
514
515         if (!(flags & NIX_TX_MULTI_SEG_F))
516                 send_hdr->w0.total = m->data_len;
517         else
518                 send_hdr->w0.total = m->pkt_len;
519         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
520
521         /*
522          * L3type:  2 => IPV4
523          *          3 => IPV4 with csum
524          *          4 => IPV6
525          * L3type and L3ptr needs to be set for either
526          * L3 csum or L4 csum or LSO
527          *
528          */
529
530         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
531             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
532                 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
533                 const uint8_t ol3type =
534                         ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
535                         ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
536                         !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
537
538                 /* Outer L3 */
539                 w1.ol3type = ol3type;
540                 mask = 0xffffull << ((!!ol3type) << 4);
541                 w1.ol3ptr = ~mask & m->outer_l2_len;
542                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
543
544                 /* Outer L4 */
545                 w1.ol4type = csum + (csum << 1);
546
547                 /* Inner L3 */
548                 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
549                              ((!!(ol_flags & PKT_TX_IPV6)) << 2);
550                 w1.il3ptr = w1.ol4ptr + m->l2_len;
551                 w1.il4ptr = w1.il3ptr + m->l3_len;
552                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
553                 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
554
555                 /* Inner L4 */
556                 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
557
558                 /* In case of no tunnel header use only
559                  * shift IL3/IL4 fields a bit to use
560                  * OL3/OL4 for header checksum
561                  */
562                 mask = !ol3type;
563                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
564                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
565
566         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
567                 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
568                 const uint8_t outer_l2_len = m->outer_l2_len;
569
570                 /* Outer L3 */
571                 w1.ol3ptr = outer_l2_len;
572                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
573                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
574                 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
575                              ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
576                              !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
577
578                 /* Outer L4 */
579                 w1.ol4type = csum + (csum << 1);
580
581         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
582                 const uint8_t l2_len = m->l2_len;
583
584                 /* Always use OLXPTR and OLXTYPE when only
585                  * when one header is present
586                  */
587
588                 /* Inner L3 */
589                 w1.ol3ptr = l2_len;
590                 w1.ol4ptr = l2_len + m->l3_len;
591                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
592                 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
593                              ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
594                              !!(ol_flags & PKT_TX_IP_CKSUM);
595
596                 /* Inner L4 */
597                 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
598         }
599
600         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
601                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
602                 /* HW will update ptr after vlan0 update */
603                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
604                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
605
606                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
607                 /* 2B before end of l2 header */
608                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
609                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
610         }
611
612         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
613                 uint16_t lso_sb;
614                 uint64_t mask;
615
616                 mask = -(!w1.il3type);
617                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
618
619                 send_hdr_ext->w0.lso_sb = lso_sb;
620                 send_hdr_ext->w0.lso = 1;
621                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
622                 send_hdr_ext->w0.lso_format =
623                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
624                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
625
626                 /* Handle tunnel tso */
627                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
628                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
629                         const uint8_t is_udp_tun =
630                                 (CNXK_NIX_UDP_TUN_BITMASK >>
631                                  ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
632                                 0x1;
633                         uint8_t shift = is_udp_tun ? 32 : 0;
634
635                         shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
636                         shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
637
638                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
639                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
640                         /* Update format for UDP tunneled packet */
641                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
642                 }
643         }
644
645         if (flags & NIX_TX_NEED_SEND_HDR_W1)
646                 send_hdr->w1.u = w1.u;
647
648         if (!(flags & NIX_TX_MULTI_SEG_F)) {
649                 sg->seg1_size = send_hdr->w0.total;
650                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
651
652                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
653                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
654                          *              is greater than 1
655                          * DF bit = 0 otherwise
656                          */
657                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
658                 }
659                 /* Mark mempool object as "put" since it is freed by NIX */
660                 if (!send_hdr->w0.df)
661                         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
662         } else {
663                 sg->seg1_size = m->data_len;
664                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
665
666                 /* NOFF is handled later for multi-seg */
667         }
668
669         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
670                 *sec = !!(ol_flags & PKT_TX_SEC_OFFLOAD);
671 }
672
673 static __rte_always_inline void
674 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
675                            const uint16_t flags)
676 {
677         struct nix_send_ext_s *send_hdr_ext;
678         union nix_send_sg_s *sg;
679
680         /* With minimal offloads, 'cmd' being local could be optimized out to
681          * registers. In other cases, 'cmd' will be in stack. Intent is
682          * 'cmd' stores content from txq->cmd which is copied only once.
683          */
684         *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
685         lmt_addr += 16;
686         if (flags & NIX_TX_NEED_EXT_HDR) {
687                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
688                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
689                 lmt_addr += 16;
690
691                 sg = (union nix_send_sg_s *)(cmd + 4);
692         } else {
693                 sg = (union nix_send_sg_s *)(cmd + 2);
694         }
695         /* In case of multi-seg, sg template is stored here */
696         *((union nix_send_sg_s *)lmt_addr) = *sg;
697         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
698 }
699
700 static __rte_always_inline void
701 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
702                               const uint64_t ol_flags, const uint16_t no_segdw,
703                               const uint16_t flags)
704 {
705         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
706                 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
707                 struct nix_send_ext_s *send_hdr_ext =
708                         (struct nix_send_ext_s *)lmt_addr + 16;
709                 uint64_t *lmt = (uint64_t *)lmt_addr;
710                 uint16_t off = (no_segdw - 1) << 1;
711                 struct nix_send_mem_s *send_mem;
712
713                 send_mem = (struct nix_send_mem_s *)(lmt + off);
714                 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
715                 send_hdr_ext->w0.tstmp = 1;
716                 if (flags & NIX_TX_MULTI_SEG_F) {
717                         /* Retrieving the default desc values */
718                         lmt[off] = cmd[2];
719
720                         /* Using compiler barier to avoid voilation of C
721                          * aliasing rules.
722                          */
723                         rte_compiler_barrier();
724                 }
725
726                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
727                  * should not be recorded, hence changing the alg type to
728                  * NIX_SENDMEMALG_SET and also changing send mem addr field to
729                  * next 8 bytes as it corrpt the actual tx tstamp registered
730                  * address.
731                  */
732                 send_mem->w0.subdc = NIX_SUBDC_MEM;
733                 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
734                 send_mem->addr =
735                         (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
736         }
737 }
738
739 static __rte_always_inline uint16_t
740 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
741 {
742         struct nix_send_hdr_s *send_hdr;
743         union nix_send_sg_s *sg;
744         struct rte_mbuf *m_next;
745         uint64_t *slist, sg_u;
746         uint64_t nb_segs;
747         uint64_t segdw;
748         uint8_t off, i;
749
750         send_hdr = (struct nix_send_hdr_s *)cmd;
751
752         if (flags & NIX_TX_NEED_EXT_HDR)
753                 off = 2;
754         else
755                 off = 0;
756
757         sg = (union nix_send_sg_s *)&cmd[2 + off];
758
759         /* Start from second segment, first segment is already there */
760         i = 1;
761         sg_u = sg->u;
762         nb_segs = m->nb_segs - 1;
763         m_next = m->next;
764         slist = &cmd[3 + off + 1];
765
766         /* Set invert df if buffer is not to be freed by H/W */
767         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
768                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
769
770                 /* Mark mempool object as "put" since it is freed by NIX */
771 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
772         if (!(sg_u & (1ULL << 55)))
773                 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
774         rte_io_wmb();
775 #endif
776         m = m_next;
777         if (!m)
778                 goto done;
779
780         /* Fill mbuf segments */
781         do {
782                 m_next = m->next;
783                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
784                 *slist = rte_mbuf_data_iova(m);
785                 /* Set invert df if buffer is not to be freed by H/W */
786                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
787                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
788                         /* Mark mempool object as "put" since it is freed by NIX
789                          */
790 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
791                 if (!(sg_u & (1ULL << (i + 55))))
792                         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
793 #endif
794                 slist++;
795                 i++;
796                 nb_segs--;
797                 if (i > 2 && nb_segs) {
798                         i = 0;
799                         /* Next SG subdesc */
800                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
801                         sg->u = sg_u;
802                         sg->segs = 3;
803                         sg = (union nix_send_sg_s *)slist;
804                         sg_u = sg->u;
805                         slist++;
806                 }
807                 m = m_next;
808         } while (nb_segs);
809
810 done:
811         sg->u = sg_u;
812         sg->segs = i;
813         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
814         /* Roundup extra dwords to multiple of 2 */
815         segdw = (segdw >> 1) + (segdw & 0x1);
816         /* Default dwords */
817         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
818         send_hdr->w0.sizem1 = segdw - 1;
819
820         return segdw;
821 }
822
823 static __rte_always_inline uint16_t
824 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
825                     uint64_t *cmd, uintptr_t base, const uint16_t flags)
826 {
827         struct cn10k_eth_txq *txq = tx_queue;
828         const rte_iova_t io_addr = txq->io_addr;
829         uint8_t lnum, c_lnum, c_shft, c_loff;
830         uintptr_t pa, lbase = txq->lmt_base;
831         uint16_t lmt_id, burst, left, i;
832         uintptr_t c_lbase = lbase;
833         rte_iova_t c_io_addr;
834         uint64_t lso_tun_fmt;
835         uint16_t c_lmt_id;
836         uint64_t sa_base;
837         uintptr_t laddr;
838         uint64_t data;
839         bool sec;
840
841         if (!(flags & NIX_TX_VWQE_F)) {
842                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
843                 /* Reduce the cached count */
844                 txq->fc_cache_pkts -= pkts;
845         }
846
847         /* Get cmd skeleton */
848         cn10k_nix_tx_skeleton(txq, cmd, flags);
849
850         if (flags & NIX_TX_OFFLOAD_TSO_F)
851                 lso_tun_fmt = txq->lso_tun_fmt;
852
853         /* Get LMT base address and LMT ID as lcore id */
854         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
855         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
856                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
857                 c_io_addr = txq->cpt_io_addr;
858                 sa_base = txq->sa_base;
859         }
860
861         left = pkts;
862 again:
863         burst = left > 32 ? 32 : left;
864
865         lnum = 0;
866         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
867                 c_lnum = 0;
868                 c_loff = 0;
869                 c_shft = 16;
870         }
871
872         for (i = 0; i < burst; i++) {
873                 /* Perform header writes for TSO, barrier at
874                  * lmt steorl will suffice.
875                  */
876                 if (flags & NIX_TX_OFFLOAD_TSO_F)
877                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
878
879                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
880                                        &sec);
881
882                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
883
884                 /* Prepare CPT instruction and get nixtx addr */
885                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
886                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
887                                            &c_lnum, &c_loff, &c_shft, sa_base,
888                                            flags);
889
890                 /* Move NIX desc to LMT/NIXTX area */
891                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
892                 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
893                                               tx_pkts[i]->ol_flags, 4, flags);
894                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
895                         lnum++;
896         }
897
898         if (flags & NIX_TX_VWQE_F)
899                 roc_sso_hws_head_wait(base);
900
901         left -= burst;
902         tx_pkts += burst;
903
904         /* Submit CPT instructions if any */
905         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
906                 /* Reduce pkts to be sent to CPT */
907                 burst -= ((c_lnum << 1) + c_loff);
908                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
909                                      c_shft);
910         }
911
912         /* Trigger LMTST */
913         if (burst > 16) {
914                 data = cn10k_nix_tx_steor_data(flags);
915                 pa = io_addr | (data & 0x7) << 4;
916                 data &= ~0x7ULL;
917                 data |= (15ULL << 12);
918                 data |= (uint64_t)lmt_id;
919
920                 /* STEOR0 */
921                 roc_lmt_submit_steorl(data, pa);
922
923                 data = cn10k_nix_tx_steor_data(flags);
924                 pa = io_addr | (data & 0x7) << 4;
925                 data &= ~0x7ULL;
926                 data |= ((uint64_t)(burst - 17)) << 12;
927                 data |= (uint64_t)(lmt_id + 16);
928
929                 /* STEOR1 */
930                 roc_lmt_submit_steorl(data, pa);
931         } else if (burst) {
932                 data = cn10k_nix_tx_steor_data(flags);
933                 pa = io_addr | (data & 0x7) << 4;
934                 data &= ~0x7ULL;
935                 data |= ((uint64_t)(burst - 1)) << 12;
936                 data |= lmt_id;
937
938                 /* STEOR0 */
939                 roc_lmt_submit_steorl(data, pa);
940         }
941
942         rte_io_wmb();
943         if (left)
944                 goto again;
945
946         return pkts;
947 }
948
949 static __rte_always_inline uint16_t
950 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
951                          uint16_t pkts, uint64_t *cmd, uintptr_t base,
952                          const uint16_t flags)
953 {
954         struct cn10k_eth_txq *txq = tx_queue;
955         uintptr_t pa0, pa1, lbase = txq->lmt_base;
956         const rte_iova_t io_addr = txq->io_addr;
957         uint16_t segdw, lmt_id, burst, left, i;
958         uint8_t lnum, c_lnum, c_loff;
959         uintptr_t c_lbase = lbase;
960         uint64_t data0, data1;
961         rte_iova_t c_io_addr;
962         uint64_t lso_tun_fmt;
963         uint8_t shft, c_shft;
964         __uint128_t data128;
965         uint16_t c_lmt_id;
966         uint64_t sa_base;
967         uintptr_t laddr;
968         bool sec;
969
970         NIX_XMIT_FC_OR_RETURN(txq, pkts);
971
972         cn10k_nix_tx_skeleton(txq, cmd, flags);
973
974         /* Reduce the cached count */
975         txq->fc_cache_pkts -= pkts;
976
977         if (flags & NIX_TX_OFFLOAD_TSO_F)
978                 lso_tun_fmt = txq->lso_tun_fmt;
979
980         /* Get LMT base address and LMT ID as lcore id */
981         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
982         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
983                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
984                 c_io_addr = txq->cpt_io_addr;
985                 sa_base = txq->sa_base;
986         }
987
988         left = pkts;
989 again:
990         burst = left > 32 ? 32 : left;
991         shft = 16;
992         data128 = 0;
993
994         lnum = 0;
995         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
996                 c_lnum = 0;
997                 c_loff = 0;
998                 c_shft = 16;
999         }
1000
1001         for (i = 0; i < burst; i++) {
1002                 /* Perform header writes for TSO, barrier at
1003                  * lmt steorl will suffice.
1004                  */
1005                 if (flags & NIX_TX_OFFLOAD_TSO_F)
1006                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1007
1008                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1009                                        &sec);
1010
1011                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1012
1013                 /* Prepare CPT instruction and get nixtx addr */
1014                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1015                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1016                                            &c_lnum, &c_loff, &c_shft, sa_base,
1017                                            flags);
1018
1019                 /* Move NIX desc to LMT/NIXTX area */
1020                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1021
1022                 /* Store sg list directly on lmt line */
1023                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1024                                                flags);
1025                 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
1026                                               tx_pkts[i]->ol_flags, segdw,
1027                                               flags);
1028                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1029                         lnum++;
1030                         data128 |= (((__uint128_t)(segdw - 1)) << shft);
1031                         shft += 3;
1032                 }
1033         }
1034
1035         if (flags & NIX_TX_VWQE_F)
1036                 roc_sso_hws_head_wait(base);
1037
1038         left -= burst;
1039         tx_pkts += burst;
1040
1041         /* Submit CPT instructions if any */
1042         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1043                 /* Reduce pkts to be sent to CPT */
1044                 burst -= ((c_lnum << 1) + c_loff);
1045                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1046                                      c_shft);
1047         }
1048
1049         data0 = (uint64_t)data128;
1050         data1 = (uint64_t)(data128 >> 64);
1051         /* Make data0 similar to data1 */
1052         data0 >>= 16;
1053         /* Trigger LMTST */
1054         if (burst > 16) {
1055                 pa0 = io_addr | (data0 & 0x7) << 4;
1056                 data0 &= ~0x7ULL;
1057                 /* Move lmtst1..15 sz to bits 63:19 */
1058                 data0 <<= 16;
1059                 data0 |= (15ULL << 12);
1060                 data0 |= (uint64_t)lmt_id;
1061
1062                 /* STEOR0 */
1063                 roc_lmt_submit_steorl(data0, pa0);
1064
1065                 pa1 = io_addr | (data1 & 0x7) << 4;
1066                 data1 &= ~0x7ULL;
1067                 data1 <<= 16;
1068                 data1 |= ((uint64_t)(burst - 17)) << 12;
1069                 data1 |= (uint64_t)(lmt_id + 16);
1070
1071                 /* STEOR1 */
1072                 roc_lmt_submit_steorl(data1, pa1);
1073         } else if (burst) {
1074                 pa0 = io_addr | (data0 & 0x7) << 4;
1075                 data0 &= ~0x7ULL;
1076                 /* Move lmtst1..15 sz to bits 63:19 */
1077                 data0 <<= 16;
1078                 data0 |= ((burst - 1) << 12);
1079                 data0 |= (uint64_t)lmt_id;
1080
1081                 /* STEOR0 */
1082                 roc_lmt_submit_steorl(data0, pa0);
1083         }
1084
1085         rte_io_wmb();
1086         if (left)
1087                 goto again;
1088
1089         return pkts;
1090 }
1091
1092 #if defined(RTE_ARCH_ARM64)
1093
1094 static __rte_always_inline void
1095 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1096                       union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1097                       const uint64_t flags, const uint64_t lso_tun_fmt)
1098 {
1099         uint16_t lso_sb;
1100         uint64_t mask;
1101
1102         if (!(ol_flags & PKT_TX_TCP_SEG))
1103                 return;
1104
1105         mask = -(!w1->il3type);
1106         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1107
1108         w0->u |= BIT(14);
1109         w0->lso_sb = lso_sb;
1110         w0->lso_mps = m->tso_segsz;
1111         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
1112         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1113
1114         /* Handle tunnel tso */
1115         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1116             (ol_flags & PKT_TX_TUNNEL_MASK)) {
1117                 const uint8_t is_udp_tun =
1118                         (CNXK_NIX_UDP_TUN_BITMASK >>
1119                          ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
1120                         0x1;
1121                 uint8_t shift = is_udp_tun ? 32 : 0;
1122
1123                 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
1124                 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
1125
1126                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1127                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1128                 /* Update format for UDP tunneled packet */
1129
1130                 w0->lso_format = (lso_tun_fmt >> shift);
1131         }
1132 }
1133
1134 static __rte_always_inline void
1135 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1136                                 union nix_send_hdr_w0_u *sh,
1137                                 union nix_send_sg_s *sg, const uint32_t flags)
1138 {
1139         struct rte_mbuf *m_next;
1140         uint64_t *slist, sg_u;
1141         uint16_t nb_segs;
1142         int i = 1;
1143
1144         sh->total = m->pkt_len;
1145         /* Clear sg->u header before use */
1146         sg->u &= 0xFC00000000000000;
1147         sg_u = sg->u;
1148         slist = &cmd[0];
1149
1150         sg_u = sg_u | ((uint64_t)m->data_len);
1151
1152         nb_segs = m->nb_segs - 1;
1153         m_next = m->next;
1154
1155         /* Set invert df if buffer is not to be freed by H/W */
1156         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1157                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1158                 /* Mark mempool object as "put" since it is freed by NIX */
1159 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1160         if (!(sg_u & (1ULL << 55)))
1161                 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
1162         rte_io_wmb();
1163 #endif
1164
1165         m = m_next;
1166         /* Fill mbuf segments */
1167         do {
1168                 m_next = m->next;
1169                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1170                 *slist = rte_mbuf_data_iova(m);
1171                 /* Set invert df if buffer is not to be freed by H/W */
1172                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1173                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1174                         /* Mark mempool object as "put" since it is freed by NIX
1175                          */
1176 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1177                 if (!(sg_u & (1ULL << (i + 55))))
1178                         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
1179                 rte_io_wmb();
1180 #endif
1181                 slist++;
1182                 i++;
1183                 nb_segs--;
1184                 if (i > 2 && nb_segs) {
1185                         i = 0;
1186                         /* Next SG subdesc */
1187                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1188                         sg->u = sg_u;
1189                         sg->segs = 3;
1190                         sg = (union nix_send_sg_s *)slist;
1191                         sg_u = sg->u;
1192                         slist++;
1193                 }
1194                 m = m_next;
1195         } while (nb_segs);
1196
1197         sg->u = sg_u;
1198         sg->segs = i;
1199 }
1200
1201 static __rte_always_inline void
1202 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1203                            uint64x2_t *cmd1, const uint8_t segdw,
1204                            const uint32_t flags)
1205 {
1206         union nix_send_hdr_w0_u sh;
1207         union nix_send_sg_s sg;
1208
1209         if (m->nb_segs == 1) {
1210                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1211                         sg.u = vgetq_lane_u64(cmd1[0], 0);
1212                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1213                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1214                 }
1215
1216 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1217                 sg.u = vgetq_lane_u64(cmd1[0], 0);
1218                 if (!(sg.u & (1ULL << 55)))
1219                         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
1220                 rte_io_wmb();
1221 #endif
1222                 return;
1223         }
1224
1225         sh.u = vgetq_lane_u64(cmd0[0], 0);
1226         sg.u = vgetq_lane_u64(cmd1[0], 0);
1227
1228         cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1229
1230         sh.sizem1 = segdw - 1;
1231         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1232         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1233 }
1234
1235 #define NIX_DESCS_PER_LOOP 4
1236
1237 static __rte_always_inline uint8_t
1238 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1239                                uint64x2_t *cmd1, uint64x2_t *cmd2,
1240                                uint64x2_t *cmd3, uint8_t *segdw,
1241                                uint64_t *lmt_addr, __uint128_t *data128,
1242                                uint8_t *shift, const uint16_t flags)
1243 {
1244         uint8_t j, off, lmt_used;
1245
1246         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1247             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1248                 /* No segments in 4 consecutive packets. */
1249                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1250                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1251                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1252                                                            &cmd0[j], &cmd1[j],
1253                                                            segdw[j], flags);
1254                         vst1q_u64(lmt_addr, cmd0[0]);
1255                         vst1q_u64(lmt_addr + 2, cmd1[0]);
1256                         vst1q_u64(lmt_addr + 4, cmd0[1]);
1257                         vst1q_u64(lmt_addr + 6, cmd1[1]);
1258                         vst1q_u64(lmt_addr + 8, cmd0[2]);
1259                         vst1q_u64(lmt_addr + 10, cmd1[2]);
1260                         vst1q_u64(lmt_addr + 12, cmd0[3]);
1261                         vst1q_u64(lmt_addr + 14, cmd1[3]);
1262
1263                         *data128 |= ((__uint128_t)7) << *shift;
1264                         shift += 3;
1265
1266                         return 1;
1267                 }
1268         }
1269
1270         lmt_used = 0;
1271         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1272                 /* Fit consecutive packets in same LMTLINE. */
1273                 if ((segdw[j] + segdw[j + 1]) <= 8) {
1274                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1275                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1276                                                            &cmd0[j], &cmd1[j],
1277                                                            segdw[j], flags);
1278                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1279                                                            &cmd0[j + 1],
1280                                                            &cmd1[j + 1],
1281                                                            segdw[j + 1], flags);
1282                                 /* TSTAMP takes 4 each, no segs. */
1283                                 vst1q_u64(lmt_addr, cmd0[j]);
1284                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1285                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1286                                 vst1q_u64(lmt_addr + 6, cmd3[j]);
1287
1288                                 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1289                                 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1290                                 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1291                                 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1292                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1293                                 /* EXT header take 3 each, space for 2 segs.*/
1294                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1295                                                            lmt_addr + 6,
1296                                                            &cmd0[j], &cmd1[j],
1297                                                            segdw[j], flags);
1298                                 vst1q_u64(lmt_addr, cmd0[j]);
1299                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1300                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1301                                 off = segdw[j] - 3;
1302                                 off <<= 1;
1303                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1304                                                            lmt_addr + 12 + off,
1305                                                            &cmd0[j + 1],
1306                                                            &cmd1[j + 1],
1307                                                            segdw[j + 1], flags);
1308                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1309                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1310                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1311                         } else {
1312                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1313                                                            lmt_addr + 4,
1314                                                            &cmd0[j], &cmd1[j],
1315                                                            segdw[j], flags);
1316                                 vst1q_u64(lmt_addr, cmd0[j]);
1317                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1318                                 off = segdw[j] - 2;
1319                                 off <<= 1;
1320                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1321                                                            lmt_addr + 8 + off,
1322                                                            &cmd0[j + 1],
1323                                                            &cmd1[j + 1],
1324                                                            segdw[j + 1], flags);
1325                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1326                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1327                         }
1328                         *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1329                                     << *shift;
1330                         *shift += 3;
1331                         j += 2;
1332                 } else {
1333                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
1334                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1335                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1336                                                            lmt_addr + 6,
1337                                                            &cmd0[j], &cmd1[j],
1338                                                            segdw[j], flags);
1339                                 vst1q_u64(lmt_addr, cmd0[j]);
1340                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1341                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1342                                 off = segdw[j] - 4;
1343                                 off <<= 1;
1344                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1345                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1346                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1347                                                            lmt_addr + 6,
1348                                                            &cmd0[j], &cmd1[j],
1349                                                            segdw[j], flags);
1350                                 vst1q_u64(lmt_addr, cmd0[j]);
1351                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1352                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1353                         } else {
1354                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1355                                                            lmt_addr + 4,
1356                                                            &cmd0[j], &cmd1[j],
1357                                                            segdw[j], flags);
1358                                 vst1q_u64(lmt_addr, cmd0[j]);
1359                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1360                         }
1361                         *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1362                         *shift += 3;
1363                         j++;
1364                 }
1365                 lmt_used++;
1366                 lmt_addr += 16;
1367         }
1368
1369         return lmt_used;
1370 }
1371
1372 static __rte_always_inline void
1373 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1374                    uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1375 {
1376         /* Go to next line if we are out of space */
1377         if ((*loff + (dw << 4)) > 128) {
1378                 *data128 = *data128 |
1379                            (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1380                 *shift = *shift + 3;
1381                 *loff = 0;
1382                 *lnum = *lnum + 1;
1383         }
1384
1385         *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1386         *loff = *loff + (dw << 4);
1387 }
1388
1389 static __rte_always_inline void
1390 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1391                      uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1392                      uint64x2_t cmd3, const uint16_t flags)
1393 {
1394         uint8_t off;
1395
1396         /* Handle no fast free when security is enabled without mseg */
1397         if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1398             (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1399             !(flags & NIX_TX_MULTI_SEG_F)) {
1400                 union nix_send_sg_s sg;
1401
1402                 sg.u = vgetq_lane_u64(cmd1, 0);
1403                 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1404                 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1405
1406 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1407                 sg.u = vgetq_lane_u64(cmd1, 0);
1408                 if (!(sg.u & (1ULL << 55)))
1409                         __mempool_check_cookies(mbuf->pool, (void **)&mbuf, 1,
1410                                                 0);
1411                 rte_io_wmb();
1412 #endif
1413         }
1414         if (flags & NIX_TX_MULTI_SEG_F) {
1415                 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1416                     (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1417                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1418                                                    &cmd0, &cmd1, segdw, flags);
1419                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1420                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1421                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1422                         off = segdw - 4;
1423                         off <<= 4;
1424                         vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1425                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1426                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1427                                                    &cmd0, &cmd1, segdw, flags);
1428                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1429                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1430                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1431                 } else {
1432                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1433                                                    &cmd0, &cmd1, segdw, flags);
1434                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1435                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1436                 }
1437         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1438                 /* Store the prepared send desc to LMT lines */
1439                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1440                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1441                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1442                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1443                         vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1444                 } else {
1445                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1446                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1447                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1448                 }
1449         } else {
1450                 /* Store the prepared send desc to LMT lines */
1451                 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1452                 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1453         }
1454 }
1455
1456 static __rte_always_inline uint16_t
1457 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1458                            uint16_t pkts, uint64_t *cmd, uintptr_t base,
1459                            const uint16_t flags)
1460 {
1461         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1462         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1463         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1464                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1465         uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1466         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1467         uint64x2_t senddesc01_w0, senddesc23_w0;
1468         uint64x2_t senddesc01_w1, senddesc23_w1;
1469         uint64x2_t sendext01_w0, sendext23_w0;
1470         uint64x2_t sendext01_w1, sendext23_w1;
1471         uint64x2_t sendmem01_w0, sendmem23_w0;
1472         uint64x2_t sendmem01_w1, sendmem23_w1;
1473         uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1474         uint64x2_t sgdesc01_w0, sgdesc23_w0;
1475         uint64x2_t sgdesc01_w1, sgdesc23_w1;
1476         struct cn10k_eth_txq *txq = tx_queue;
1477         rte_iova_t io_addr = txq->io_addr;
1478         uintptr_t laddr = txq->lmt_base;
1479         uint8_t c_lnum, c_shft, c_loff;
1480         uint64x2_t ltypes01, ltypes23;
1481         uint64x2_t xtmp128, ytmp128;
1482         uint64x2_t xmask01, xmask23;
1483         uintptr_t c_laddr = laddr;
1484         uint8_t lnum, shift, loff;
1485         rte_iova_t c_io_addr;
1486         uint64_t sa_base;
1487         union wdata {
1488                 __uint128_t data128;
1489                 uint64_t data[2];
1490         } wd;
1491
1492         if (!(flags & NIX_TX_VWQE_F)) {
1493                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1494                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1495                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1496                 /* Reduce the cached count */
1497                 txq->fc_cache_pkts -= pkts;
1498         } else {
1499                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1500                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1501         }
1502
1503         /* Perform header writes before barrier for TSO */
1504         if (flags & NIX_TX_OFFLOAD_TSO_F) {
1505                 for (i = 0; i < pkts; i++)
1506                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1507         }
1508
1509         senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1510         senddesc23_w0 = senddesc01_w0;
1511         senddesc01_w1 = vdupq_n_u64(0);
1512         senddesc23_w1 = senddesc01_w1;
1513         sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1514         sgdesc23_w0 = sgdesc01_w0;
1515
1516         /* Load command defaults into vector variables. */
1517         if (flags & NIX_TX_NEED_EXT_HDR) {
1518                 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1519                 sendext23_w0 = sendext01_w0;
1520                 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1521                 sendext23_w1 = sendext01_w1;
1522                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1523                         sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1524                         sendmem23_w0 = sendmem01_w0;
1525                         sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1526                         sendmem23_w1 = sendmem01_w1;
1527                 }
1528         }
1529
1530         /* Get LMT base address and LMT ID as lcore id */
1531         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1532         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1533                 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1534                 c_io_addr = txq->cpt_io_addr;
1535                 sa_base = txq->sa_base;
1536         }
1537
1538         left = pkts;
1539 again:
1540         /* Number of packets to prepare depends on offloads enabled. */
1541         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1542                               cn10k_nix_pkts_per_vec_brst(flags) :
1543                               left;
1544         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1545                 wd.data128 = 0;
1546                 shift = 16;
1547         }
1548         lnum = 0;
1549         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1550                 loff = 0;
1551                 c_loff = 0;
1552                 c_lnum = 0;
1553                 c_shft = 16;
1554         }
1555
1556         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1557                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1558                         burst = i;
1559                         break;
1560                 }
1561
1562                 if (flags & NIX_TX_MULTI_SEG_F) {
1563                         uint8_t j;
1564
1565                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1566                                 struct rte_mbuf *m = tx_pkts[j];
1567
1568                                 /* Get dwords based on nb_segs. */
1569                                 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1570                                 /* Add dwords based on offloads. */
1571                                 segdw[j] += 1 + /* SEND HDR */
1572                                             !!(flags & NIX_TX_NEED_EXT_HDR) +
1573                                             !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1574                         }
1575
1576                         /* Check if there are enough LMTLINES for this loop */
1577                         if (lnum + 4 > 32) {
1578                                 uint8_t ldwords_con = 0, lneeded = 0;
1579                                 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1580                                         ldwords_con += segdw[j];
1581                                         if (ldwords_con > 8) {
1582                                                 lneeded += 1;
1583                                                 ldwords_con = segdw[j];
1584                                         }
1585                                 }
1586                                 lneeded += 1;
1587                                 if (lnum + lneeded > 32) {
1588                                         burst = i;
1589                                         break;
1590                                 }
1591                         }
1592                 }
1593                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1594                 senddesc01_w0 =
1595                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1596                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1597
1598                 senddesc23_w0 = senddesc01_w0;
1599                 sgdesc23_w0 = sgdesc01_w0;
1600
1601                 /* Clear vlan enables. */
1602                 if (flags & NIX_TX_NEED_EXT_HDR) {
1603                         sendext01_w1 = vbicq_u64(sendext01_w1,
1604                                                  vdupq_n_u64(0x3FFFF00FFFF00));
1605                         sendext23_w1 = sendext01_w1;
1606                 }
1607
1608                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1609                         /* Reset send mem alg to SETTSTMP from SUB*/
1610                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
1611                                                  vdupq_n_u64(BIT_ULL(59)));
1612                         /* Reset send mem address to default. */
1613                         sendmem01_w1 =
1614                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1615                         sendmem23_w0 = sendmem01_w0;
1616                         sendmem23_w1 = sendmem01_w1;
1617                 }
1618
1619                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1620                         /* Clear the LSO enable bit. */
1621                         sendext01_w0 = vbicq_u64(sendext01_w0,
1622                                                  vdupq_n_u64(BIT_ULL(14)));
1623                         sendext23_w0 = sendext01_w0;
1624                 }
1625
1626                 /* Move mbufs to iova */
1627                 mbuf0 = (uint64_t *)tx_pkts[0];
1628                 mbuf1 = (uint64_t *)tx_pkts[1];
1629                 mbuf2 = (uint64_t *)tx_pkts[2];
1630                 mbuf3 = (uint64_t *)tx_pkts[3];
1631
1632                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1633                                      offsetof(struct rte_mbuf, buf_iova));
1634                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1635                                      offsetof(struct rte_mbuf, buf_iova));
1636                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1637                                      offsetof(struct rte_mbuf, buf_iova));
1638                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1639                                      offsetof(struct rte_mbuf, buf_iova));
1640                 /*
1641                  * Get mbuf's, olflags, iova, pktlen, dataoff
1642                  * dataoff_iovaX.D[0] = iova,
1643                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1644                  * len_olflagsX.D[0] = ol_flags,
1645                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1646                  */
1647                 dataoff_iova0 = vld1q_u64(mbuf0);
1648                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1649                 dataoff_iova1 = vld1q_u64(mbuf1);
1650                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1651                 dataoff_iova2 = vld1q_u64(mbuf2);
1652                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1653                 dataoff_iova3 = vld1q_u64(mbuf3);
1654                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1655
1656                 /* Move mbufs to point pool */
1657                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1658                                      offsetof(struct rte_mbuf, pool) -
1659                                      offsetof(struct rte_mbuf, buf_iova));
1660                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1661                                      offsetof(struct rte_mbuf, pool) -
1662                                      offsetof(struct rte_mbuf, buf_iova));
1663                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1664                                      offsetof(struct rte_mbuf, pool) -
1665                                      offsetof(struct rte_mbuf, buf_iova));
1666                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1667                                      offsetof(struct rte_mbuf, pool) -
1668                                      offsetof(struct rte_mbuf, buf_iova));
1669
1670                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1671                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1672                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1673                         /*
1674                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1675                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1676                          */
1677
1678                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1679                                      : [a] "+w"(senddesc01_w1)
1680                                      : [in] "r"(mbuf0 + 2)
1681                                      : "memory");
1682
1683                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1684                                      : [a] "+w"(senddesc01_w1)
1685                                      : [in] "r"(mbuf1 + 2)
1686                                      : "memory");
1687
1688                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1689                                      : [b] "+w"(senddesc23_w1)
1690                                      : [in] "r"(mbuf2 + 2)
1691                                      : "memory");
1692
1693                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1694                                      : [b] "+w"(senddesc23_w1)
1695                                      : [in] "r"(mbuf3 + 2)
1696                                      : "memory");
1697
1698                         /* Get pool pointer alone */
1699                         mbuf0 = (uint64_t *)*mbuf0;
1700                         mbuf1 = (uint64_t *)*mbuf1;
1701                         mbuf2 = (uint64_t *)*mbuf2;
1702                         mbuf3 = (uint64_t *)*mbuf3;
1703                 } else {
1704                         /* Get pool pointer alone */
1705                         mbuf0 = (uint64_t *)*mbuf0;
1706                         mbuf1 = (uint64_t *)*mbuf1;
1707                         mbuf2 = (uint64_t *)*mbuf2;
1708                         mbuf3 = (uint64_t *)*mbuf3;
1709                 }
1710
1711                 const uint8x16_t shuf_mask2 = {
1712                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1713                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1714                 };
1715                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1716                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1717
1718                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1719                 const uint64x2_t and_mask0 = {
1720                         0xFFFFFFFFFFFFFFFF,
1721                         0x000000000000FFFF,
1722                 };
1723
1724                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1725                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1726                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1727                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1728
1729                 /*
1730                  * Pick only 16 bits of pktlen preset at bits 63:32
1731                  * and place them at bits 15:0.
1732                  */
1733                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1734                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1735
1736                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1737                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1738                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1739
1740                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1741                  * pktlen at 15:0 position.
1742                  */
1743                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1744                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1745                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1746                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1747
1748                 /* Move mbuf to point to pool_id. */
1749                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1750                                      offsetof(struct rte_mempool, pool_id));
1751                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1752                                      offsetof(struct rte_mempool, pool_id));
1753                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1754                                      offsetof(struct rte_mempool, pool_id));
1755                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1756                                      offsetof(struct rte_mempool, pool_id));
1757
1758                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1759                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1760                         /*
1761                          * Lookup table to translate ol_flags to
1762                          * il3/il4 types. But we still use ol3/ol4 types in
1763                          * senddesc_w1 as only one header processing is enabled.
1764                          */
1765                         const uint8x16_t tbl = {
1766                                 /* [0-15] = il4type:il3type */
1767                                 0x04, /* none (IPv6 assumed) */
1768                                 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
1769                                 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
1770                                 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
1771                                 0x03, /* PKT_TX_IP_CKSUM */
1772                                 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
1773                                 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
1774                                 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
1775                                 0x02, /* PKT_TX_IPV4  */
1776                                 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
1777                                 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
1778                                 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
1779                                 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
1780                                 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1781                                        * PKT_TX_TCP_CKSUM
1782                                        */
1783                                 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1784                                        * PKT_TX_SCTP_CKSUM
1785                                        */
1786                                 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1787                                        * PKT_TX_UDP_CKSUM
1788                                        */
1789                         };
1790
1791                         /* Extract olflags to translate to iltypes */
1792                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1793                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1794
1795                         /*
1796                          * E(47):L3_LEN(9):L2_LEN(7+z)
1797                          * E(47):L3_LEN(9):L2_LEN(7+z)
1798                          */
1799                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1800                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1801
1802                         /* Move OLFLAGS bits 55:52 to 51:48
1803                          * with zeros preprended on the byte and rest
1804                          * don't care
1805                          */
1806                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1807                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1808                         /*
1809                          * E(48):L3_LEN(8):L2_LEN(z+7)
1810                          * E(48):L3_LEN(8):L2_LEN(z+7)
1811                          */
1812                         const int8x16_t tshft3 = {
1813                                 -1, 0, 8, 8, 8, 8, 8, 8,
1814                                 -1, 0, 8, 8, 8, 8, 8, 8,
1815                         };
1816
1817                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1818                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1819
1820                         /* Do the lookup */
1821                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1822                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1823
1824                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1825                          * and place it in ol3/ol4type of senddesc_w1
1826                          */
1827                         const uint8x16_t shuf_mask0 = {
1828                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1829                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1830                         };
1831
1832                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1833                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1834
1835                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1836                          * a [E(32):E(16):OL3(8):OL2(8)]
1837                          * a = a + (a << 8)
1838                          * a [E(32):E(16):(OL3+OL2):OL2]
1839                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1840                          */
1841                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1842                                                  vshlq_n_u16(senddesc01_w1, 8));
1843                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1844                                                  vshlq_n_u16(senddesc23_w1, 8));
1845
1846                         /* Move ltypes to senddesc*_w1 */
1847                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1848                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1849                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1850                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1851                         /*
1852                          * Lookup table to translate ol_flags to
1853                          * ol3/ol4 types.
1854                          */
1855
1856                         const uint8x16_t tbl = {
1857                                 /* [0-15] = ol4type:ol3type */
1858                                 0x00, /* none */
1859                                 0x03, /* OUTER_IP_CKSUM */
1860                                 0x02, /* OUTER_IPV4 */
1861                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1862                                 0x04, /* OUTER_IPV6 */
1863                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1864                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1865                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1866                                        * OUTER_IP_CKSUM
1867                                        */
1868                                 0x00, /* OUTER_UDP_CKSUM */
1869                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1870                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1871                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1872                                        * OUTER_IP_CKSUM
1873                                        */
1874                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1875                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1876                                        * OUTER_IP_CKSUM
1877                                        */
1878                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1879                                        * OUTER_IPV4
1880                                        */
1881                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1882                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1883                                        */
1884                         };
1885
1886                         /* Extract olflags to translate to iltypes */
1887                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1888                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1889
1890                         /*
1891                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1892                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1893                          */
1894                         const uint8x16_t shuf_mask5 = {
1895                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1896                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1897                         };
1898                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1899                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1900
1901                         /* Extract outer ol flags only */
1902                         const uint64x2_t o_cksum_mask = {
1903                                 0x1C00020000000000,
1904                                 0x1C00020000000000,
1905                         };
1906
1907                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1908                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1909
1910                         /* Extract OUTER_UDP_CKSUM bit 41 and
1911                          * move it to bit 61
1912                          */
1913
1914                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1915                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1916
1917                         /* Shift oltype by 2 to start nibble from BIT(56)
1918                          * instead of BIT(58)
1919                          */
1920                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1921                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1922                         /*
1923                          * E(48):L3_LEN(8):L2_LEN(z+7)
1924                          * E(48):L3_LEN(8):L2_LEN(z+7)
1925                          */
1926                         const int8x16_t tshft3 = {
1927                                 -1, 0, 8, 8, 8, 8, 8, 8,
1928                                 -1, 0, 8, 8, 8, 8, 8, 8,
1929                         };
1930
1931                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1932                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1933
1934                         /* Do the lookup */
1935                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1936                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1937
1938                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1939                          * and place it in ol3/ol4type of senddesc_w1
1940                          */
1941                         const uint8x16_t shuf_mask0 = {
1942                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1943                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1944                         };
1945
1946                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1947                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1948
1949                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1950                          * a [E(32):E(16):OL3(8):OL2(8)]
1951                          * a = a + (a << 8)
1952                          * a [E(32):E(16):(OL3+OL2):OL2]
1953                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1954                          */
1955                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1956                                                  vshlq_n_u16(senddesc01_w1, 8));
1957                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1958                                                  vshlq_n_u16(senddesc23_w1, 8));
1959
1960                         /* Move ltypes to senddesc*_w1 */
1961                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1962                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1963                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1964                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1965                         /* Lookup table to translate ol_flags to
1966                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1967                          */
1968                         const uint8x16x2_t tbl = {{
1969                                 {
1970                                         /* [0-15] = il4type:il3type */
1971                                         0x04, /* none (IPv6) */
1972                                         0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1973                                         0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1974                                         0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1975                                         0x03, /* PKT_TX_IP_CKSUM */
1976                                         0x13, /* PKT_TX_IP_CKSUM |
1977                                                * PKT_TX_TCP_CKSUM
1978                                                */
1979                                         0x23, /* PKT_TX_IP_CKSUM |
1980                                                * PKT_TX_SCTP_CKSUM
1981                                                */
1982                                         0x33, /* PKT_TX_IP_CKSUM |
1983                                                * PKT_TX_UDP_CKSUM
1984                                                */
1985                                         0x02, /* PKT_TX_IPV4 */
1986                                         0x12, /* PKT_TX_IPV4 |
1987                                                * PKT_TX_TCP_CKSUM
1988                                                */
1989                                         0x22, /* PKT_TX_IPV4 |
1990                                                * PKT_TX_SCTP_CKSUM
1991                                                */
1992                                         0x32, /* PKT_TX_IPV4 |
1993                                                * PKT_TX_UDP_CKSUM
1994                                                */
1995                                         0x03, /* PKT_TX_IPV4 |
1996                                                * PKT_TX_IP_CKSUM
1997                                                */
1998                                         0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1999                                                * PKT_TX_TCP_CKSUM
2000                                                */
2001                                         0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
2002                                                * PKT_TX_SCTP_CKSUM
2003                                                */
2004                                         0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
2005                                                * PKT_TX_UDP_CKSUM
2006                                                */
2007                                 },
2008
2009                                 {
2010                                         /* [16-31] = ol4type:ol3type */
2011                                         0x00, /* none */
2012                                         0x03, /* OUTER_IP_CKSUM */
2013                                         0x02, /* OUTER_IPV4 */
2014                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2015                                         0x04, /* OUTER_IPV6 */
2016                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2017                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2018                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2019                                                * OUTER_IP_CKSUM
2020                                                */
2021                                         0x00, /* OUTER_UDP_CKSUM */
2022                                         0x33, /* OUTER_UDP_CKSUM |
2023                                                * OUTER_IP_CKSUM
2024                                                */
2025                                         0x32, /* OUTER_UDP_CKSUM |
2026                                                * OUTER_IPV4
2027                                                */
2028                                         0x33, /* OUTER_UDP_CKSUM |
2029                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2030                                                */
2031                                         0x34, /* OUTER_UDP_CKSUM |
2032                                                * OUTER_IPV6
2033                                                */
2034                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2035                                                * OUTER_IP_CKSUM
2036                                                */
2037                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2038                                                * OUTER_IPV4
2039                                                */
2040                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2041                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2042                                                */
2043                                 },
2044                         }};
2045
2046                         /* Extract olflags to translate to oltype & iltype */
2047                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2048                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2049
2050                         /*
2051                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2052                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2053                          */
2054                         const uint32x4_t tshft_4 = {
2055                                 1,
2056                                 0,
2057                                 1,
2058                                 0,
2059                         };
2060                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2061                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2062
2063                         /*
2064                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2065                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2066                          */
2067                         const uint8x16_t shuf_mask5 = {
2068                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2069                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2070                         };
2071                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2072                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2073
2074                         /* Extract outer and inner header ol_flags */
2075                         const uint64x2_t oi_cksum_mask = {
2076                                 0x1CF0020000000000,
2077                                 0x1CF0020000000000,
2078                         };
2079
2080                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2081                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2082
2083                         /* Extract OUTER_UDP_CKSUM bit 41 and
2084                          * move it to bit 61
2085                          */
2086
2087                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2088                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2089
2090                         /* Shift right oltype by 2 and iltype by 4
2091                          * to start oltype nibble from BIT(58)
2092                          * instead of BIT(56) and iltype nibble from BIT(48)
2093                          * instead of BIT(52).
2094                          */
2095                         const int8x16_t tshft5 = {
2096                                 8, 8, 8, 8, 8, 8, -4, -2,
2097                                 8, 8, 8, 8, 8, 8, -4, -2,
2098                         };
2099
2100                         xtmp128 = vshlq_u8(xtmp128, tshft5);
2101                         ytmp128 = vshlq_u8(ytmp128, tshft5);
2102                         /*
2103                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2104                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2105                          */
2106                         const int8x16_t tshft3 = {
2107                                 -1, 0, -1, 0, 0, 0, 0, 0,
2108                                 -1, 0, -1, 0, 0, 0, 0, 0,
2109                         };
2110
2111                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2112                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2113
2114                         /* Mark Bit(4) of oltype */
2115                         const uint64x2_t oi_cksum_mask2 = {
2116                                 0x1000000000000000,
2117                                 0x1000000000000000,
2118                         };
2119
2120                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2121                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2122
2123                         /* Do the lookup */
2124                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2125                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2126
2127                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
2128                          * Bit 56:63 of oltype and place it in corresponding
2129                          * place in senddesc_w1.
2130                          */
2131                         const uint8x16_t shuf_mask0 = {
2132                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2133                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2134                         };
2135
2136                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2137                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2138
2139                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2140                          * l3len, l2len, ol3len, ol2len.
2141                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2142                          * a = a + (a << 8)
2143                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2144                          * a = a + (a << 16)
2145                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2146                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2147                          */
2148                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2149                                                  vshlq_n_u32(senddesc01_w1, 8));
2150                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2151                                                  vshlq_n_u32(senddesc23_w1, 8));
2152
2153                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2154                         senddesc01_w1 = vaddq_u8(
2155                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2156                         senddesc23_w1 = vaddq_u8(
2157                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2158
2159                         /* Move ltypes to senddesc*_w1 */
2160                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2161                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2162                 }
2163
2164                 xmask01 = vdupq_n_u64(0);
2165                 xmask23 = xmask01;
2166                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2167                              : [a] "+w"(xmask01)
2168                              : [in] "r"(mbuf0)
2169                              : "memory");
2170
2171                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2172                              : [a] "+w"(xmask01)
2173                              : [in] "r"(mbuf1)
2174                              : "memory");
2175
2176                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2177                              : [b] "+w"(xmask23)
2178                              : [in] "r"(mbuf2)
2179                              : "memory");
2180
2181                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2182                              : [b] "+w"(xmask23)
2183                              : [in] "r"(mbuf3)
2184                              : "memory");
2185                 xmask01 = vshlq_n_u64(xmask01, 20);
2186                 xmask23 = vshlq_n_u64(xmask23, 20);
2187
2188                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2189                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2190
2191                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2192                         /* Tx ol_flag for vlan. */
2193                         const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
2194                         /* Bit enable for VLAN1 */
2195                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2196                         /* Tx ol_flag for QnQ. */
2197                         const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
2198                         /* Bit enable for VLAN0 */
2199                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2200                         /* Load vlan values from packet. outer is VLAN 0 */
2201                         uint64x2_t ext01 = {
2202                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2203                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2204                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2205                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2206                         };
2207                         uint64x2_t ext23 = {
2208                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2209                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2210                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2211                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2212                         };
2213
2214                         /* Get ol_flags of the packets. */
2215                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2216                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2217
2218                         /* ORR vlan outer/inner values into cmd. */
2219                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2220                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2221
2222                         /* Test for offload enable bits and generate masks. */
2223                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2224                                                       mlv),
2225                                             vandq_u64(vtstq_u64(xtmp128, olq),
2226                                                       mlq));
2227                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2228                                                       mlv),
2229                                             vandq_u64(vtstq_u64(ytmp128, olq),
2230                                                       mlq));
2231
2232                         /* Set vlan enable bits into cmd based on mask. */
2233                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2234                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2235                 }
2236
2237                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2238                         /* Tx ol_flag for timestam. */
2239                         const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
2240                                                 PKT_TX_IEEE1588_TMST};
2241                         /* Set send mem alg to SUB. */
2242                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2243                         /* Increment send mem address by 8. */
2244                         const uint64x2_t addr = {0x8, 0x8};
2245
2246                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2247                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2248
2249                         /* Check if timestamp is requested and generate inverted
2250                          * mask as we need not make any changes to default cmd
2251                          * value.
2252                          */
2253                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2254                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2255
2256                         /* Change send mem address to an 8 byte offset when
2257                          * TSTMP is disabled.
2258                          */
2259                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
2260                                                  vandq_u64(xtmp128, addr));
2261                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
2262                                                  vandq_u64(ytmp128, addr));
2263                         /* Change send mem alg to SUB when TSTMP is disabled. */
2264                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
2265                                                  vandq_u64(xtmp128, alg));
2266                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
2267                                                  vandq_u64(ytmp128, alg));
2268
2269                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2270                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2271                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2272                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2273                 }
2274
2275                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2276                         const uint64_t lso_fmt = txq->lso_tun_fmt;
2277                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2278                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2279
2280                         /* Extract SD W1 as we need to set L4 types. */
2281                         vst1q_u64(sd_w1, senddesc01_w1);
2282                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
2283
2284                         /* Extract SX W0 as we need to set LSO fields. */
2285                         vst1q_u64(sx_w0, sendext01_w0);
2286                         vst1q_u64(sx_w0 + 2, sendext23_w0);
2287
2288                         /* Extract ol_flags. */
2289                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2290                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2291
2292                         /* Prepare individual mbufs. */
2293                         cn10k_nix_prepare_tso(tx_pkts[0],
2294                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
2295                                 (union nix_send_ext_w0_u *)&sx_w0[0],
2296                                 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2297
2298                         cn10k_nix_prepare_tso(tx_pkts[1],
2299                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
2300                                 (union nix_send_ext_w0_u *)&sx_w0[1],
2301                                 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2302
2303                         cn10k_nix_prepare_tso(tx_pkts[2],
2304                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
2305                                 (union nix_send_ext_w0_u *)&sx_w0[2],
2306                                 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2307
2308                         cn10k_nix_prepare_tso(tx_pkts[3],
2309                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
2310                                 (union nix_send_ext_w0_u *)&sx_w0[3],
2311                                 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2312
2313                         senddesc01_w1 = vld1q_u64(sd_w1);
2314                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2315
2316                         sendext01_w0 = vld1q_u64(sx_w0);
2317                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
2318                 }
2319
2320                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2321                     !(flags & NIX_TX_MULTI_SEG_F) &&
2322                     !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2323                         /* Set don't free bit if reference count > 1 */
2324                         xmask01 = vdupq_n_u64(0);
2325                         xmask23 = xmask01;
2326
2327                         /* Move mbufs to iova */
2328                         mbuf0 = (uint64_t *)tx_pkts[0];
2329                         mbuf1 = (uint64_t *)tx_pkts[1];
2330                         mbuf2 = (uint64_t *)tx_pkts[2];
2331                         mbuf3 = (uint64_t *)tx_pkts[3];
2332
2333                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2334                                 vsetq_lane_u64(0x80000, xmask01, 0);
2335                         else
2336                                 __mempool_check_cookies(
2337                                         ((struct rte_mbuf *)mbuf0)->pool,
2338                                         (void **)&mbuf0, 1, 0);
2339
2340                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2341                                 vsetq_lane_u64(0x80000, xmask01, 1);
2342                         else
2343                                 __mempool_check_cookies(
2344                                         ((struct rte_mbuf *)mbuf1)->pool,
2345                                         (void **)&mbuf1, 1, 0);
2346
2347                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2348                                 vsetq_lane_u64(0x80000, xmask23, 0);
2349                         else
2350                                 __mempool_check_cookies(
2351                                         ((struct rte_mbuf *)mbuf2)->pool,
2352                                         (void **)&mbuf2, 1, 0);
2353
2354                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2355                                 vsetq_lane_u64(0x80000, xmask23, 1);
2356                         else
2357                                 __mempool_check_cookies(
2358                                         ((struct rte_mbuf *)mbuf3)->pool,
2359                                         (void **)&mbuf3, 1, 0);
2360                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2361                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2362                 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2363                            !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2364                         /* Move mbufs to iova */
2365                         mbuf0 = (uint64_t *)tx_pkts[0];
2366                         mbuf1 = (uint64_t *)tx_pkts[1];
2367                         mbuf2 = (uint64_t *)tx_pkts[2];
2368                         mbuf3 = (uint64_t *)tx_pkts[3];
2369
2370                         /* Mark mempool object as "put" since
2371                          * it is freed by NIX
2372                          */
2373                         __mempool_check_cookies(
2374                                 ((struct rte_mbuf *)mbuf0)->pool,
2375                                 (void **)&mbuf0, 1, 0);
2376
2377                         __mempool_check_cookies(
2378                                 ((struct rte_mbuf *)mbuf1)->pool,
2379                                 (void **)&mbuf1, 1, 0);
2380
2381                         __mempool_check_cookies(
2382                                 ((struct rte_mbuf *)mbuf2)->pool,
2383                                 (void **)&mbuf2, 1, 0);
2384
2385                         __mempool_check_cookies(
2386                                 ((struct rte_mbuf *)mbuf3)->pool,
2387                                 (void **)&mbuf3, 1, 0);
2388                 }
2389
2390                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2391                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2392                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2393                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2394                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2395
2396                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2397                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2398                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2399                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2400
2401                 if (flags & NIX_TX_NEED_EXT_HDR) {
2402                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2403                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2404                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2405                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2406                 }
2407
2408                 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2409                         const uint64x2_t olf = {PKT_TX_SEC_OFFLOAD,
2410                                                 PKT_TX_SEC_OFFLOAD};
2411                         uintptr_t next;
2412                         uint8_t dw;
2413
2414                         /* Extract ol_flags. */
2415                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2416                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2417
2418                         xtmp128 = vtstq_u64(olf, xtmp128);
2419                         ytmp128 = vtstq_u64(olf, ytmp128);
2420
2421                         /* Process mbuf0 */
2422                         dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2423                         if (vgetq_lane_u64(xtmp128, 0))
2424                                 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2425                                                        &cmd1[0], &next, c_laddr,
2426                                                        &c_lnum, &c_loff,
2427                                                        &c_shft, sa_base, flags);
2428                         else
2429                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2430                                                    &shift, &wd.data128, &next);
2431
2432                         /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2433                         cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2434                                              cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2435                                              flags);
2436
2437                         /* Process mbuf1 */
2438                         dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2439                         if (vgetq_lane_u64(xtmp128, 1))
2440                                 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2441                                                        &cmd1[1], &next, c_laddr,
2442                                                        &c_lnum, &c_loff,
2443                                                        &c_shft, sa_base, flags);
2444                         else
2445                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2446                                                    &shift, &wd.data128, &next);
2447
2448                         /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2449                         cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2450                                              cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2451                                              flags);
2452
2453                         /* Process mbuf2 */
2454                         dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2455                         if (vgetq_lane_u64(ytmp128, 0))
2456                                 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2457                                                        &cmd1[2], &next, c_laddr,
2458                                                        &c_lnum, &c_loff,
2459                                                        &c_shft, sa_base, flags);
2460                         else
2461                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2462                                                    &shift, &wd.data128, &next);
2463
2464                         /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2465                         cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2466                                              cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2467                                              flags);
2468
2469                         /* Process mbuf3 */
2470                         dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2471                         if (vgetq_lane_u64(ytmp128, 1))
2472                                 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2473                                                        &cmd1[3], &next, c_laddr,
2474                                                        &c_lnum, &c_loff,
2475                                                        &c_shft, sa_base, flags);
2476                         else
2477                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2478                                                    &shift, &wd.data128, &next);
2479
2480                         /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2481                         cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2482                                              cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2483                                              flags);
2484
2485                 } else if (flags & NIX_TX_MULTI_SEG_F) {
2486                         uint8_t j;
2487
2488                         segdw[4] = 8;
2489                         j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2490                                                           cmd2, cmd3, segdw,
2491                                                           (uint64_t *)
2492                                                           LMT_OFF(laddr, lnum,
2493                                                                   0),
2494                                                           &wd.data128, &shift,
2495                                                           flags);
2496                         lnum += j;
2497                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2498                         /* Store the prepared send desc to LMT lines */
2499                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2500                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2501                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2502                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2503                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2504                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2505                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2506                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2507                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2508                                 lnum += 1;
2509                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2510                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2511                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2512                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2513                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2514                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2515                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2516                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2517                         } else {
2518                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2519                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2520                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2521                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2522                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2523                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2524                                 lnum += 1;
2525                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2526                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2527                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2528                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2529                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2530                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2531                         }
2532                         lnum += 1;
2533                 } else {
2534                         /* Store the prepared send desc to LMT lines */
2535                         vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2536                         vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2537                         vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2538                         vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2539                         vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2540                         vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2541                         vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2542                         vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2543                         lnum += 1;
2544                 }
2545
2546                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2547         }
2548
2549         /* Roundup lnum to last line if it is partial */
2550         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2551                 lnum = lnum + !!loff;
2552                 wd.data128 = wd.data128 |
2553                         (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2554         }
2555
2556         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2557                 wd.data[0] >>= 16;
2558
2559         if (flags & NIX_TX_VWQE_F)
2560                 roc_sso_hws_head_wait(base);
2561
2562         left -= burst;
2563
2564         /* Submit CPT instructions if any */
2565         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2566                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2567                                      c_shft);
2568
2569         /* Trigger LMTST */
2570         if (lnum > 16) {
2571                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2572                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2573
2574                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2575                 wd.data[0] &= ~0x7ULL;
2576
2577                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2578                         wd.data[0] <<= 16;
2579
2580                 wd.data[0] |= (15ULL << 12);
2581                 wd.data[0] |= (uint64_t)lmt_id;
2582
2583                 /* STEOR0 */
2584                 roc_lmt_submit_steorl(wd.data[0], pa);
2585
2586                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2587                         wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2588
2589                 pa = io_addr | (wd.data[1] & 0x7) << 4;
2590                 wd.data[1] &= ~0x7ULL;
2591
2592                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2593                         wd.data[1] <<= 16;
2594
2595                 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2596                 wd.data[1] |= (uint64_t)(lmt_id + 16);
2597
2598                 /* STEOR1 */
2599                 roc_lmt_submit_steorl(wd.data[1], pa);
2600         } else if (lnum) {
2601                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2602                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2603
2604                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2605                 wd.data[0] &= ~0x7ULL;
2606
2607                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2608                         wd.data[0] <<= 16;
2609
2610                 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2611                 wd.data[0] |= lmt_id;
2612
2613                 /* STEOR0 */
2614                 roc_lmt_submit_steorl(wd.data[0], pa);
2615         }
2616
2617         rte_io_wmb();
2618         if (left)
2619                 goto again;
2620
2621         if (unlikely(scalar)) {
2622                 if (flags & NIX_TX_MULTI_SEG_F)
2623                         pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2624                                                          scalar, cmd, base,
2625                                                          flags);
2626                 else
2627                         pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2628                                                     cmd, base, flags);
2629         }
2630
2631         return pkts;
2632 }
2633
2634 #else
2635 static __rte_always_inline uint16_t
2636 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2637                            uint16_t pkts, uint64_t *cmd, uintptr_t base,
2638                            const uint16_t flags)
2639 {
2640         RTE_SET_USED(tx_queue);
2641         RTE_SET_USED(tx_pkts);
2642         RTE_SET_USED(pkts);
2643         RTE_SET_USED(cmd);
2644         RTE_SET_USED(flags);
2645         RTE_SET_USED(base);
2646         return 0;
2647 }
2648 #endif
2649
2650 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2651 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2652 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2653 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2654 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
2655 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2656 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2657
2658 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2659 #define NIX_TX_FASTPATH_MODES                                           \
2660 T(no_offload,                           0, 0, 0, 0, 0, 0, 0,    4,      \
2661                 NIX_TX_OFFLOAD_NONE)                                    \
2662 T(l3l4csum,                             0, 0, 0, 0, 0, 0, 1,    4,      \
2663                 L3L4CSUM_F)                                             \
2664 T(ol3ol4csum,                           0, 0, 0, 0, 0, 1, 0,    4,      \
2665                 OL3OL4CSUM_F)                                           \
2666 T(ol3ol4csum_l3l4csum,                  0, 0, 0, 0, 0, 1, 1,    4,      \
2667                 OL3OL4CSUM_F | L3L4CSUM_F)                              \
2668 T(vlan,                                 0, 0, 0, 0, 1, 0, 0,    6,      \
2669                 VLAN_F)                                                 \
2670 T(vlan_l3l4csum,                        0, 0, 0, 0, 1, 0, 1,    6,      \
2671                 VLAN_F | L3L4CSUM_F)                                    \
2672 T(vlan_ol3ol4csum,                      0, 0, 0, 0, 1, 1, 0,    6,      \
2673                 VLAN_F | OL3OL4CSUM_F)                                  \
2674 T(vlan_ol3ol4csum_l3l4csum,             0, 0, 0, 0, 1, 1, 1,    6,      \
2675                 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                     \
2676 T(noff,                                 0, 0, 0, 1, 0, 0, 0,    4,      \
2677                 NOFF_F)                                                 \
2678 T(noff_l3l4csum,                        0, 0, 0, 1, 0, 0, 1,    4,      \
2679                 NOFF_F | L3L4CSUM_F)                                    \
2680 T(noff_ol3ol4csum,                      0, 0, 0, 1, 0, 1, 0,    4,      \
2681                 NOFF_F | OL3OL4CSUM_F)                                  \
2682 T(noff_ol3ol4csum_l3l4csum,             0, 0, 0, 1, 0, 1, 1,    4,      \
2683                 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                     \
2684 T(noff_vlan,                            0, 0, 0, 1, 1, 0, 0,    6,      \
2685                 NOFF_F | VLAN_F)                                        \
2686 T(noff_vlan_l3l4csum,                   0, 0, 0, 1, 1, 0, 1,    6,      \
2687                 NOFF_F | VLAN_F | L3L4CSUM_F)                           \
2688 T(noff_vlan_ol3ol4csum,                 0, 0, 0, 1, 1, 1, 0,    6,      \
2689                 NOFF_F | VLAN_F | OL3OL4CSUM_F)                         \
2690 T(noff_vlan_ol3ol4csum_l3l4csum,        0, 0, 0, 1, 1, 1, 1,    6,      \
2691                 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)            \
2692 T(tso,                                  0, 0, 1, 0, 0, 0, 0,    6,      \
2693                 TSO_F)                                                  \
2694 T(tso_l3l4csum,                         0, 0, 1, 0, 0, 0, 1,    6,      \
2695                 TSO_F | L3L4CSUM_F)                                     \
2696 T(tso_ol3ol4csum,                       0, 0, 1, 0, 0, 1, 0,    6,      \
2697                 TSO_F | OL3OL4CSUM_F)                                   \
2698 T(tso_ol3ol4csum_l3l4csum,              0, 0, 1, 0, 0, 1, 1,    6,      \
2699                 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                      \
2700 T(tso_vlan,                             0, 0, 1, 0, 1, 0, 0,    6,      \
2701                 TSO_F | VLAN_F)                                         \
2702 T(tso_vlan_l3l4csum,                    0, 0, 1, 0, 1, 0, 1,    6,      \
2703                 TSO_F | VLAN_F | L3L4CSUM_F)                            \
2704 T(tso_vlan_ol3ol4csum,                  0, 0, 1, 0, 1, 1, 0,    6,      \
2705                 TSO_F | VLAN_F | OL3OL4CSUM_F)                          \
2706 T(tso_vlan_ol3ol4csum_l3l4csum,         0, 0, 1, 0, 1, 1, 1,    6,      \
2707                 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
2708 T(tso_noff,                             0, 0, 1, 1, 0, 0, 0,    6,      \
2709                 TSO_F | NOFF_F)                                         \
2710 T(tso_noff_l3l4csum,                    0, 0, 1, 1, 0, 0, 1,    6,      \
2711                 TSO_F | NOFF_F | L3L4CSUM_F)                            \
2712 T(tso_noff_ol3ol4csum,                  0, 0, 1, 1, 0, 1, 0,    6,      \
2713                 TSO_F | NOFF_F | OL3OL4CSUM_F)                          \
2714 T(tso_noff_ol3ol4csum_l3l4csum,         0, 0, 1, 1, 0, 1, 1,    6,      \
2715                 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
2716 T(tso_noff_vlan,                        0, 0, 1, 1, 1, 0, 0,    6,      \
2717                 TSO_F | NOFF_F | VLAN_F)                                \
2718 T(tso_noff_vlan_l3l4csum,               0, 0, 1, 1, 1, 0, 1,    6,      \
2719                 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                   \
2720 T(tso_noff_vlan_ol3ol4csum,             0, 0, 1, 1, 1, 1, 0,    6,      \
2721                 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                 \
2722 T(tso_noff_vlan_ol3ol4csum_l3l4csum,    0, 0, 1, 1, 1, 1, 1,    6,      \
2723                 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
2724 T(ts,                                   0, 1, 0, 0, 0, 0, 0,    8,      \
2725                 TSP_F)                                                  \
2726 T(ts_l3l4csum,                          0, 1, 0, 0, 0, 0, 1,    8,      \
2727                 TSP_F | L3L4CSUM_F)                                     \
2728 T(ts_ol3ol4csum,                        0, 1, 0, 0, 0, 1, 0,    8,      \
2729                 TSP_F | OL3OL4CSUM_F)                                   \
2730 T(ts_ol3ol4csum_l3l4csum,               0, 1, 0, 0, 0, 1, 1,    8,      \
2731                 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                      \
2732 T(ts_vlan,                              0, 1, 0, 0, 1, 0, 0,    8,      \
2733                 TSP_F | VLAN_F)                                         \
2734 T(ts_vlan_l3l4csum,                     0, 1, 0, 0, 1, 0, 1,    8,      \
2735                 TSP_F | VLAN_F | L3L4CSUM_F)                            \
2736 T(ts_vlan_ol3ol4csum,                   0, 1, 0, 0, 1, 1, 0,    8,      \
2737                 TSP_F | VLAN_F | OL3OL4CSUM_F)                          \
2738 T(ts_vlan_ol3ol4csum_l3l4csum,          0, 1, 0, 0, 1, 1, 1,    8,      \
2739                 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
2740 T(ts_noff,                              0, 1, 0, 1, 0, 0, 0,    8,      \
2741                 TSP_F | NOFF_F)                                         \
2742 T(ts_noff_l3l4csum,                     0, 1, 0, 1, 0, 0, 1,    8,      \
2743                 TSP_F | NOFF_F | L3L4CSUM_F)                            \
2744 T(ts_noff_ol3ol4csum,                   0, 1, 0, 1, 0, 1, 0,    8,      \
2745                 TSP_F | NOFF_F | OL3OL4CSUM_F)                          \
2746 T(ts_noff_ol3ol4csum_l3l4csum,          0, 1, 0, 1, 0, 1, 1,    8,      \
2747                 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
2748 T(ts_noff_vlan,                         0, 1, 0, 1, 1, 0, 0,    8,      \
2749                 TSP_F | NOFF_F | VLAN_F)                                \
2750 T(ts_noff_vlan_l3l4csum,                0, 1, 0, 1, 1, 0, 1,    8,      \
2751                 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                   \
2752 T(ts_noff_vlan_ol3ol4csum,              0, 1, 0, 1, 1, 1, 0,    8,      \
2753                 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                 \
2754 T(ts_noff_vlan_ol3ol4csum_l3l4csum,     0, 1, 0, 1, 1, 1, 1,    8,      \
2755                 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
2756 T(ts_tso,                               0, 1, 1, 0, 0, 0, 0,    8,      \
2757                 TSP_F | TSO_F)                                          \
2758 T(ts_tso_l3l4csum,                      0, 1, 1, 0, 0, 0, 1,    8,      \
2759                 TSP_F | TSO_F | L3L4CSUM_F)                             \
2760 T(ts_tso_ol3ol4csum,                    0, 1, 1, 0, 0, 1, 0,    8,      \
2761                 TSP_F | TSO_F | OL3OL4CSUM_F)                           \
2762 T(ts_tso_ol3ol4csum_l3l4csum,           0, 1, 1, 0, 0, 1, 1,    8,      \
2763                 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)              \
2764 T(ts_tso_vlan,                          0, 1, 1, 0, 1, 0, 0,    8,      \
2765                 TSP_F | TSO_F | VLAN_F)                                 \
2766 T(ts_tso_vlan_l3l4csum,                 0, 1, 1, 0, 1, 0, 1,    8,      \
2767                 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                    \
2768 T(ts_tso_vlan_ol3ol4csum,               0, 1, 1, 0, 1, 1, 0,    8,      \
2769                 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                  \
2770 T(ts_tso_vlan_ol3ol4csum_l3l4csum,      0, 1, 1, 0, 1, 1, 1,    8,      \
2771                 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2772 T(ts_tso_noff,                          0, 1, 1, 1, 0, 0, 0,    8,      \
2773                 TSP_F | TSO_F | NOFF_F)                                 \
2774 T(ts_tso_noff_l3l4csum,                 0, 1, 1, 1, 0, 0, 1,    8,      \
2775                 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                    \
2776 T(ts_tso_noff_ol3ol4csum,               0, 1, 1, 1, 0, 1, 0,    8,      \
2777                 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                  \
2778 T(ts_tso_noff_ol3ol4csum_l3l4csum,      0, 1, 1, 1, 0, 1, 1,    8,      \
2779                 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2780 T(ts_tso_noff_vlan,                     0, 1, 1, 1, 1, 0, 0,    8,      \
2781                 TSP_F | TSO_F | NOFF_F | VLAN_F)                        \
2782 T(ts_tso_noff_vlan_l3l4csum,            0, 1, 1, 1, 1, 0, 1,    8,      \
2783                 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)           \
2784 T(ts_tso_noff_vlan_ol3ol4csum,          0, 1, 1, 1, 1, 1, 0,    8,      \
2785                 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)         \
2786 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 1,    8,      \
2787                 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2788 T(sec,                                  1, 0, 0, 0, 0, 0, 0,    4,      \
2789                 T_SEC_F)                                                \
2790 T(sec_l3l4csum,                         1, 0, 0, 0, 0, 0, 1,    4,      \
2791                 T_SEC_F | L3L4CSUM_F)                                   \
2792 T(sec_ol3ol4csum,                       1, 0, 0, 0, 0, 1, 0,    4,      \
2793                 T_SEC_F | OL3OL4CSUM_F)                                 \
2794 T(sec_ol3ol4csum_l3l4csum,              1, 0, 0, 0, 0, 1, 1,    4,      \
2795                 T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)                    \
2796 T(sec_vlan,                             1, 0, 0, 0, 1, 0, 0,    6,      \
2797                 T_SEC_F | VLAN_F)                                       \
2798 T(sec_vlan_l3l4csum,                    1, 0, 0, 0, 1, 0, 1,    6,      \
2799                 T_SEC_F | VLAN_F | L3L4CSUM_F)                          \
2800 T(sec_vlan_ol3ol4csum,                  1, 0, 0, 0, 1, 1, 0,    6,      \
2801                 T_SEC_F | VLAN_F | OL3OL4CSUM_F)                        \
2802 T(sec_vlan_ol3ol4csum_l3l4csum,         1, 0, 0, 0, 1, 1, 1,    6,      \
2803                 T_SEC_F | VLAN_F | OL3OL4CSUM_F |       L3L4CSUM_F)     \
2804 T(sec_noff,                             1, 0, 0, 1, 0, 0, 0,    4,      \
2805                 T_SEC_F | NOFF_F)                                       \
2806 T(sec_noff_l3l4csum,                    1, 0, 0, 1, 0, 0, 1,    4,      \
2807                 T_SEC_F | NOFF_F | L3L4CSUM_F)                          \
2808 T(sec_noff_ol3ol4csum,                  1, 0, 0, 1, 0, 1, 0,    4,      \
2809                 T_SEC_F | NOFF_F | OL3OL4CSUM_F)                        \
2810 T(sec_noff_ol3ol4csum_l3l4csum,         1, 0, 0, 1, 0, 1, 1,    4,      \
2811                 T_SEC_F | NOFF_F | OL3OL4CSUM_F |       L3L4CSUM_F)     \
2812 T(sec_noff_vlan,                        1, 0, 0, 1, 1, 0, 0,    6,      \
2813                 T_SEC_F | NOFF_F | VLAN_F)                              \
2814 T(sec_noff_vlan_l3l4csum,               1, 0, 0, 1, 1, 0, 1,    6,      \
2815                 T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2816 T(sec_noff_vlan_ol3ol4csum,             1, 0, 0, 1, 1, 1, 0,    6,      \
2817                 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)               \
2818 T(sec_noff_vlan_ol3ol4csum_l3l4csum,    1, 0, 0, 1, 1, 1, 1,    6,      \
2819                 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)  \
2820 T(sec_tso,                              1, 0, 1, 0, 0, 0, 0,    6,      \
2821                 T_SEC_F | TSO_F)                                        \
2822 T(sec_tso_l3l4csum,                     1, 0, 1, 0, 0, 0, 1,    6,      \
2823                 T_SEC_F | TSO_F | L3L4CSUM_F)                           \
2824 T(sec_tso_ol3ol4csum,                   1, 0, 1, 0, 0, 1, 0,    6,      \
2825                 T_SEC_F | TSO_F | OL3OL4CSUM_F)                         \
2826 T(sec_tso_ol3ol4csum_l3l4csum,          1, 0, 1, 0, 0, 1, 1,    6,      \
2827                 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)            \
2828 T(sec_tso_vlan,                         1, 0, 1, 0, 1, 0, 0,    6,      \
2829                 T_SEC_F | TSO_F | VLAN_F)                               \
2830 T(sec_tso_vlan_l3l4csum,                1, 0, 1, 0, 1, 0, 1,    6,      \
2831                 T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)                  \
2832 T(sec_tso_vlan_ol3ol4csum,              1, 0, 1, 0, 1, 1, 0,    6,      \
2833                 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                \
2834 T(sec_tso_vlan_ol3ol4csum_l3l4csum,     1, 0, 1, 0, 1, 1, 1,    6,      \
2835                 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)   \
2836 T(sec_tso_noff,                         1, 0, 1, 1, 0, 0, 0,    6,      \
2837                 T_SEC_F | TSO_F | NOFF_F)                               \
2838 T(sec_tso_noff_l3l4csum,                1, 0, 1, 1, 0, 0, 1,    6,      \
2839                 T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)                  \
2840 T(sec_tso_noff_ol3ol4csum,              1, 0, 1, 1, 0, 1, 0,    6,      \
2841                 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                \
2842 T(sec_tso_noff_ol3ol4csum_l3l4csum,     1, 0, 1, 1, 0, 1, 1,    6,      \
2843                 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)   \
2844 T(sec_tso_noff_vlan,                    1, 0, 1, 1, 1, 0, 0,    6,      \
2845                 T_SEC_F | TSO_F | NOFF_F | VLAN_F)                      \
2846 T(sec_tso_noff_vlan_l3l4csum,           1, 0, 1, 1, 1, 0, 1,    6,      \
2847                 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)         \
2848 T(sec_tso_noff_vlan_ol3ol4csum,         1, 0, 1, 1, 1, 1, 0,    6,      \
2849                 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)       \
2850 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 1,   6,      \
2851                 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2852 T(sec_ts,                               1, 1, 0, 0, 0, 0, 0,    8,      \
2853                 T_SEC_F | TSP_F)                                        \
2854 T(sec_ts_l3l4csum,                      1, 1, 0, 0, 0, 0, 1,    8,      \
2855                 T_SEC_F | TSP_F | L3L4CSUM_F)                           \
2856 T(sec_ts_ol3ol4csum,                    1, 1, 0, 0, 0, 1, 0,    8,      \
2857                 T_SEC_F | TSP_F | OL3OL4CSUM_F)                         \
2858 T(sec_ts_ol3ol4csum_l3l4csum,           1, 1, 0, 0, 0, 1, 1,    8,      \
2859                 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)            \
2860 T(sec_ts_vlan,                          1, 1, 0, 0, 1, 0, 0,    8,      \
2861                 T_SEC_F | TSP_F | VLAN_F)                               \
2862 T(sec_ts_vlan_l3l4csum,                 1, 1, 0, 0, 1, 0, 1,    8,      \
2863                 T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)                  \
2864 T(sec_ts_vlan_ol3ol4csum,               1, 1, 0, 0, 1, 1, 0,    8,      \
2865                 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2866 T(sec_ts_vlan_ol3ol4csum_l3l4csum,      1, 1, 0, 0, 1, 1, 1,    8,      \
2867                 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)   \
2868 T(sec_ts_noff,                          1, 1, 0, 1, 0, 0, 0,    8,      \
2869                 T_SEC_F | TSP_F | NOFF_F)                               \
2870 T(sec_ts_noff_l3l4csum,                 1, 1, 0, 1, 0, 0, 1,    8,      \
2871                 T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)                  \
2872 T(sec_ts_noff_ol3ol4csum,               1, 1, 0, 1, 0, 1, 0,    8,      \
2873                 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2874 T(sec_ts_noff_ol3ol4csum_l3l4csum,      1, 1, 0, 1, 0, 1, 1,    8,      \
2875                 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)   \
2876 T(sec_ts_noff_vlan,                     1, 1, 0, 1, 1, 0, 0,    8,      \
2877                 T_SEC_F | TSP_F | NOFF_F | VLAN_F)                      \
2878 T(sec_ts_noff_vlan_l3l4csum,            1, 1, 0, 1, 1, 0, 1,    8,      \
2879                 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)         \
2880 T(sec_ts_noff_vlan_ol3ol4csum,          1, 1, 0, 1, 1, 1, 0,    8,      \
2881                 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)       \
2882 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 1,    8,      \
2883                 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2884 T(sec_ts_tso,                           1, 1, 1, 0, 0, 0, 0,    8,      \
2885                 T_SEC_F | TSP_F | TSO_F)                                \
2886 T(sec_ts_tso_l3l4csum,                  1, 1, 1, 0, 0, 0, 1,    8,      \
2887                 T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)                   \
2888 T(sec_ts_tso_ol3ol4csum,                1, 1, 1, 0, 0, 1, 0,    8,      \
2889                 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)                 \
2890 T(sec_ts_tso_ol3ol4csum_l3l4csum,       1, 1, 1, 0, 0, 1, 1,    8,      \
2891                 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
2892 T(sec_ts_tso_vlan,                      1, 1, 1, 0, 1, 0, 0,    8,      \
2893                 T_SEC_F | TSP_F | TSO_F | VLAN_F)                       \
2894 T(sec_ts_tso_vlan_l3l4csum,             1, 1, 1, 0, 1, 0, 1,    8,      \
2895                 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)          \
2896 T(sec_ts_tso_vlan_ol3ol4csum,           1, 1, 1, 0, 1, 1, 0,    8,      \
2897                 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)        \
2898 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum,  1, 1, 1, 0, 1, 1, 1,    8,      \
2899                 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2900 T(sec_ts_tso_noff,                      1, 1, 1, 1, 0, 0, 0,    8,      \
2901                 T_SEC_F | TSP_F | TSO_F | NOFF_F)                       \
2902 T(sec_ts_tso_noff_l3l4csum,             1, 1, 1, 1, 0, 0, 1,    8,      \
2903                 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)          \
2904 T(sec_ts_tso_noff_ol3ol4csum,           1, 1, 1, 1, 0, 1, 0,    8,      \
2905                 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)        \
2906 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum,  1, 1, 1, 1, 0, 1, 1,    8,      \
2907                 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2908 T(sec_ts_tso_noff_vlan,                 1, 1, 1, 1, 1, 0, 0,    8,      \
2909                 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)              \
2910 T(sec_ts_tso_noff_vlan_l3l4csum,        1, 1, 1, 1, 1, 0, 1,    8,      \
2911                 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2912 T(sec_ts_tso_noff_vlan_ol3ol4csum,      1, 1, 1, 1, 1, 1, 0,    8,      \
2913                 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)\
2914 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 1, 8,     \
2915                 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2916                 L3L4CSUM_F)
2917
2918 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)                         \
2919         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
2920                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2921                                                                                \
2922         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
2923                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2924                                                                                \
2925         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
2926                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2927                                                                                \
2928         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2929                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2930
2931 NIX_TX_FASTPATH_MODES
2932 #undef T
2933
2934 #endif /* __CN10K_TX_H__ */