net/cnxk: support to update precolor DSCP table
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #include <rte_eventdev.h>
10
11 #define NIX_TX_OFFLOAD_NONE           (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
19
20 /* Flags to control xmit_prepare function.
21  * Defining it from backwards to denote its been
22  * not used as offload flags to pick function
23  */
24 #define NIX_TX_VWQE_F      BIT(14)
25 #define NIX_TX_MULTI_SEG_F BIT(15)
26
27 #define NIX_TX_NEED_SEND_HDR_W1                                                \
28         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
29          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
30
31 #define NIX_TX_NEED_EXT_HDR                                                    \
32         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
33          NIX_TX_OFFLOAD_TSO_F)
34
35 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
36         do {                                                                   \
37                 /* Cached value is low, Update the fc_cache_pkts */            \
38                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
39                         /* Multiply with sqe_per_sqb to express in pkts */     \
40                         (txq)->fc_cache_pkts =                                 \
41                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
42                                 << (txq)->sqes_per_sqb_log2;                   \
43                         /* Check it again for the room */                      \
44                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
45                                 return 0;                                      \
46                 }                                                              \
47         } while (0)
48
49 /* Encoded number of segments to number of dwords macro, each value of nb_segs
50  * is encoded as 4bits.
51  */
52 #define NIX_SEGDW_MAGIC 0x76654432210ULL
53
54 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
55
56 /* Function to determine no of tx subdesc required in case ext
57  * sub desc is enabled.
58  */
59 static __rte_always_inline int
60 cn10k_nix_tx_ext_subs(const uint16_t flags)
61 {
62         return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
63                              2 :
64                              ((flags &
65                          (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
66                                       1 :
67                                       0);
68 }
69
70 static __rte_always_inline uint8_t
71 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
72 {
73         if (!(flags & NIX_TX_MULTI_SEG_F))
74                 return cn10k_nix_tx_ext_subs(flags) + 2;
75
76         /* Already everything is accounted for in segdw */
77         return segdw;
78 }
79
80 static __rte_always_inline uint8_t
81 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
82 {
83         return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
84                << ROC_LMT_LINES_PER_CORE_LOG2;
85 }
86
87 static __rte_always_inline uint8_t
88 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
89 {
90         return (flags & NIX_TX_NEED_EXT_HDR) ?
91                              ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
92                              8;
93 }
94
95 static __rte_always_inline uint64_t
96 cn10k_nix_tx_steor_data(const uint16_t flags)
97 {
98         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
99         uint64_t data;
100
101         /* This will be moved to addr area */
102         data = dw_m1;
103         /* 15 vector sizes for single seg */
104         data |= dw_m1 << 19;
105         data |= dw_m1 << 22;
106         data |= dw_m1 << 25;
107         data |= dw_m1 << 28;
108         data |= dw_m1 << 31;
109         data |= dw_m1 << 34;
110         data |= dw_m1 << 37;
111         data |= dw_m1 << 40;
112         data |= dw_m1 << 43;
113         data |= dw_m1 << 46;
114         data |= dw_m1 << 49;
115         data |= dw_m1 << 52;
116         data |= dw_m1 << 55;
117         data |= dw_m1 << 58;
118         data |= dw_m1 << 61;
119
120         return data;
121 }
122
123 static __rte_always_inline uint8_t
124 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
125 {
126         return ((flags & NIX_TX_NEED_EXT_HDR) ?
127                               (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
128                               4);
129 }
130
131 static __rte_always_inline uint64_t
132 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
133 {
134         const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
135         uint64_t data;
136
137         /* This will be moved to addr area */
138         data = dw_m1;
139         /* 15 vector sizes for single seg */
140         data |= dw_m1 << 19;
141         data |= dw_m1 << 22;
142         data |= dw_m1 << 25;
143         data |= dw_m1 << 28;
144         data |= dw_m1 << 31;
145         data |= dw_m1 << 34;
146         data |= dw_m1 << 37;
147         data |= dw_m1 << 40;
148         data |= dw_m1 << 43;
149         data |= dw_m1 << 46;
150         data |= dw_m1 << 49;
151         data |= dw_m1 << 52;
152         data |= dw_m1 << 55;
153         data |= dw_m1 << 58;
154         data |= dw_m1 << 61;
155
156         return data;
157 }
158
159 static __rte_always_inline uint64_t
160 cn10k_cpt_tx_steor_data(void)
161 {
162         /* We have two CPT instructions per LMTLine */
163         const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
164         uint64_t data;
165
166         /* This will be moved to addr area */
167         data = dw_m1 << 16;
168         data |= dw_m1 << 19;
169         data |= dw_m1 << 22;
170         data |= dw_m1 << 25;
171         data |= dw_m1 << 28;
172         data |= dw_m1 << 31;
173         data |= dw_m1 << 34;
174         data |= dw_m1 << 37;
175         data |= dw_m1 << 40;
176         data |= dw_m1 << 43;
177         data |= dw_m1 << 46;
178         data |= dw_m1 << 49;
179         data |= dw_m1 << 52;
180         data |= dw_m1 << 55;
181         data |= dw_m1 << 58;
182         data |= dw_m1 << 61;
183
184         return data;
185 }
186
187 static __rte_always_inline void
188 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
189                       const uint16_t flags)
190 {
191         /* Send hdr */
192         cmd[0] = txq->send_hdr_w0;
193         cmd[1] = 0;
194         cmd += 2;
195
196         /* Send ext if present */
197         if (flags & NIX_TX_NEED_EXT_HDR) {
198                 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
199                 cmd += 2;
200         }
201
202         /* Send sg */
203         cmd[0] = txq->sg_w0;
204         cmd[1] = 0;
205 }
206
207 static __rte_always_inline void
208 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
209                      uint8_t loff, uint8_t shft)
210 {
211         uint64_t data;
212         uintptr_t pa;
213
214         /* Check if there is any CPT instruction to submit */
215         if (!lnum && !loff)
216                 return;
217
218         data = cn10k_cpt_tx_steor_data();
219         /* Update lmtline use for partial end line */
220         if (loff) {
221                 data &= ~(0x7ULL << shft);
222                 /* Update it to half full i.e 64B */
223                 data |= (0x3UL << shft);
224         }
225
226         pa = io_addr | ((data >> 16) & 0x7) << 4;
227         data &= ~(0x7ULL << 16);
228         /* Update lines - 1 that contain valid data */
229         data |= ((uint64_t)(lnum + loff - 1)) << 12;
230         data |= lmt_id;
231
232         /* STEOR */
233         roc_lmt_submit_steorl(data, pa);
234 }
235
236 #if defined(RTE_ARCH_ARM64)
237 static __rte_always_inline void
238 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
239                        uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
240                        uint8_t *loff, uint8_t *shft, uint64_t sa_base,
241                        const uint16_t flags)
242 {
243         struct cn10k_sec_sess_priv sess_priv;
244         uint32_t pkt_len, dlen_adj, rlen;
245         uint64x2_t cmd01, cmd23;
246         uintptr_t dptr, nixtx;
247         uint64_t ucode_cmd[4];
248         uint64_t *laddr;
249         uint8_t l2_len;
250         uint16_t tag;
251         uint64_t sa;
252
253         sess_priv.u64 = *rte_security_dynfield(m);
254
255         if (flags & NIX_TX_NEED_SEND_HDR_W1)
256                 l2_len = vgetq_lane_u8(*cmd0, 8);
257         else
258                 l2_len = m->l2_len;
259
260         /* Retrieve DPTR */
261         dptr = vgetq_lane_u64(*cmd1, 1);
262         pkt_len = vgetq_lane_u16(*cmd0, 0);
263
264         /* Calculate dlen adj */
265         dlen_adj = pkt_len - l2_len;
266         rlen = (dlen_adj + sess_priv.roundup_len) +
267                (sess_priv.roundup_byte - 1);
268         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
269         rlen += sess_priv.partial_len;
270         dlen_adj = rlen - dlen_adj;
271
272         /* Update send descriptors. Security is single segment only */
273         *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
274         *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
275
276         /* Get area where NIX descriptor needs to be stored */
277         nixtx = dptr + pkt_len + dlen_adj;
278         nixtx += BIT_ULL(7);
279         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
280
281         /* Return nixtx addr */
282         *nixtx_addr = (nixtx + 16);
283
284         /* DLEN passed is excluding L2HDR */
285         pkt_len -= l2_len;
286         tag = sa_base & 0xFFFFUL;
287         sa_base &= ~0xFFFFUL;
288         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
289         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
290         ucode_cmd[0] =
291                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
292
293         /* CPT Word 0 and Word 1 */
294         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
295         /* CPT_RES_S is 16B above NIXTX */
296         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
297
298         /* CPT word 2 and 3 */
299         cmd23 = vdupq_n_u64(0);
300         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
301                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
302         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
303
304         dptr += l2_len;
305
306         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
307                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
308                         *((uint16_t *)(dptr - 2)) =
309                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
310                 else
311                         *((uint16_t *)(dptr - 2)) =
312                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
313         }
314
315         ucode_cmd[1] = dptr;
316         ucode_cmd[2] = dptr;
317
318         /* Move to our line */
319         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
320
321         /* Write CPT instruction to lmt line */
322         vst1q_u64(laddr, cmd01);
323         vst1q_u64((laddr + 2), cmd23);
324
325         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
326         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
327
328         /* Move to next line for every other CPT inst */
329         *loff = !(*loff);
330         *lnum = *lnum + (*loff ? 0 : 1);
331         *shft = *shft + (*loff ? 0 : 3);
332 }
333
334 static __rte_always_inline void
335 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
336                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
337                    uint64_t sa_base, const uint16_t flags)
338 {
339         struct cn10k_sec_sess_priv sess_priv;
340         uint32_t pkt_len, dlen_adj, rlen;
341         struct nix_send_hdr_s *send_hdr;
342         uint64x2_t cmd01, cmd23;
343         union nix_send_sg_s *sg;
344         uintptr_t dptr, nixtx;
345         uint64_t ucode_cmd[4];
346         uint64_t *laddr;
347         uint8_t l2_len;
348         uint16_t tag;
349         uint64_t sa;
350
351         /* Move to our line from base */
352         sess_priv.u64 = *rte_security_dynfield(m);
353         send_hdr = (struct nix_send_hdr_s *)cmd;
354         if (flags & NIX_TX_NEED_EXT_HDR)
355                 sg = (union nix_send_sg_s *)&cmd[4];
356         else
357                 sg = (union nix_send_sg_s *)&cmd[2];
358
359         if (flags & NIX_TX_NEED_SEND_HDR_W1)
360                 l2_len = cmd[1] & 0xFF;
361         else
362                 l2_len = m->l2_len;
363
364         /* Retrieve DPTR */
365         dptr = *(uint64_t *)(sg + 1);
366         pkt_len = send_hdr->w0.total;
367
368         /* Calculate dlen adj */
369         dlen_adj = pkt_len - l2_len;
370         rlen = (dlen_adj + sess_priv.roundup_len) +
371                (sess_priv.roundup_byte - 1);
372         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
373         rlen += sess_priv.partial_len;
374         dlen_adj = rlen - dlen_adj;
375
376         /* Update send descriptors. Security is single segment only */
377         send_hdr->w0.total = pkt_len + dlen_adj;
378         sg->seg1_size = pkt_len + dlen_adj;
379
380         /* Get area where NIX descriptor needs to be stored */
381         nixtx = dptr + pkt_len + dlen_adj;
382         nixtx += BIT_ULL(7);
383         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
384
385         /* Return nixtx addr */
386         *nixtx_addr = (nixtx + 16);
387
388         /* DLEN passed is excluding L2HDR */
389         pkt_len -= l2_len;
390         tag = sa_base & 0xFFFFUL;
391         sa_base &= ~0xFFFFUL;
392         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
393         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
394         ucode_cmd[0] =
395                 (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | pkt_len);
396
397         /* CPT Word 0 and Word 1. Assume no multi-seg support */
398         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
399         /* CPT_RES_S is 16B above NIXTX */
400         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
401
402         /* CPT word 2 and 3 */
403         cmd23 = vdupq_n_u64(0);
404         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
405                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
406         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
407
408         dptr += l2_len;
409
410         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
411                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
412                         *((uint16_t *)(dptr - 2)) =
413                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
414                 else
415                         *((uint16_t *)(dptr - 2)) =
416                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
417         }
418         ucode_cmd[1] = dptr;
419         ucode_cmd[2] = dptr;
420
421         /* Move to our line */
422         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
423
424         /* Write CPT instruction to lmt line */
425         vst1q_u64(laddr, cmd01);
426         vst1q_u64((laddr + 2), cmd23);
427
428         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
429         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
430
431         /* Move to next line for every other CPT inst */
432         *loff = !(*loff);
433         *lnum = *lnum + (*loff ? 0 : 1);
434         *shft = *shft + (*loff ? 0 : 3);
435 }
436
437 #else
438
439 static __rte_always_inline void
440 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
441                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
442                    uint64_t sa_base, const uint16_t flags)
443 {
444         RTE_SET_USED(m);
445         RTE_SET_USED(cmd);
446         RTE_SET_USED(nixtx_addr);
447         RTE_SET_USED(lbase);
448         RTE_SET_USED(lnum);
449         RTE_SET_USED(loff);
450         RTE_SET_USED(shft);
451         RTE_SET_USED(sa_base);
452         RTE_SET_USED(flags);
453 }
454 #endif
455
456 static __rte_always_inline void
457 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
458 {
459         uint64_t mask, ol_flags = m->ol_flags;
460
461         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
462                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
463                 uint16_t *iplen, *oiplen, *oudplen;
464                 uint16_t lso_sb, paylen;
465
466                 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
467                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
468                          m->l2_len + m->l3_len + m->l4_len;
469
470                 /* Reduce payload len from base headers */
471                 paylen = m->pkt_len - lso_sb;
472
473                 /* Get iplen position assuming no tunnel hdr */
474                 iplen = (uint16_t *)(mdata + m->l2_len +
475                                      (2 << !!(ol_flags & PKT_TX_IPV6)));
476                 /* Handle tunnel tso */
477                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
478                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
479                         const uint8_t is_udp_tun =
480                                 (CNXK_NIX_UDP_TUN_BITMASK >>
481                                  ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
482                                 0x1;
483
484                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
485                                               (2 << !!(ol_flags &
486                                                        PKT_TX_OUTER_IPV6)));
487                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
488                                                    paylen);
489
490                         /* Update format for UDP tunneled packet */
491                         if (is_udp_tun) {
492                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
493                                                        m->outer_l3_len + 4);
494                                 *oudplen = rte_cpu_to_be_16(
495                                         rte_be_to_cpu_16(*oudplen) - paylen);
496                         }
497
498                         /* Update iplen position to inner ip hdr */
499                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
500                                              m->l4_len +
501                                              (2 << !!(ol_flags & PKT_TX_IPV6)));
502                 }
503
504                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
505         }
506 }
507
508 static __rte_always_inline void
509 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
510                        const uint64_t lso_tun_fmt, bool *sec)
511 {
512         struct nix_send_ext_s *send_hdr_ext;
513         struct nix_send_hdr_s *send_hdr;
514         uint64_t ol_flags = 0, mask;
515         union nix_send_hdr_w1_u w1;
516         union nix_send_sg_s *sg;
517
518         send_hdr = (struct nix_send_hdr_s *)cmd;
519         if (flags & NIX_TX_NEED_EXT_HDR) {
520                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
521                 sg = (union nix_send_sg_s *)(cmd + 4);
522                 /* Clear previous markings */
523                 send_hdr_ext->w0.lso = 0;
524                 send_hdr_ext->w1.u = 0;
525         } else {
526                 sg = (union nix_send_sg_s *)(cmd + 2);
527         }
528
529         if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
530                 ol_flags = m->ol_flags;
531                 w1.u = 0;
532         }
533
534         if (!(flags & NIX_TX_MULTI_SEG_F))
535                 send_hdr->w0.total = m->data_len;
536         else
537                 send_hdr->w0.total = m->pkt_len;
538         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
539
540         /*
541          * L3type:  2 => IPV4
542          *          3 => IPV4 with csum
543          *          4 => IPV6
544          * L3type and L3ptr needs to be set for either
545          * L3 csum or L4 csum or LSO
546          *
547          */
548
549         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
550             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
551                 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
552                 const uint8_t ol3type =
553                         ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
554                         ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
555                         !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
556
557                 /* Outer L3 */
558                 w1.ol3type = ol3type;
559                 mask = 0xffffull << ((!!ol3type) << 4);
560                 w1.ol3ptr = ~mask & m->outer_l2_len;
561                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
562
563                 /* Outer L4 */
564                 w1.ol4type = csum + (csum << 1);
565
566                 /* Inner L3 */
567                 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
568                              ((!!(ol_flags & PKT_TX_IPV6)) << 2);
569                 w1.il3ptr = w1.ol4ptr + m->l2_len;
570                 w1.il4ptr = w1.il3ptr + m->l3_len;
571                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
572                 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
573
574                 /* Inner L4 */
575                 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
576
577                 /* In case of no tunnel header use only
578                  * shift IL3/IL4 fields a bit to use
579                  * OL3/OL4 for header checksum
580                  */
581                 mask = !ol3type;
582                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
583                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
584
585         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
586                 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
587                 const uint8_t outer_l2_len = m->outer_l2_len;
588
589                 /* Outer L3 */
590                 w1.ol3ptr = outer_l2_len;
591                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
592                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
593                 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
594                              ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
595                              !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
596
597                 /* Outer L4 */
598                 w1.ol4type = csum + (csum << 1);
599
600         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
601                 const uint8_t l2_len = m->l2_len;
602
603                 /* Always use OLXPTR and OLXTYPE when only
604                  * when one header is present
605                  */
606
607                 /* Inner L3 */
608                 w1.ol3ptr = l2_len;
609                 w1.ol4ptr = l2_len + m->l3_len;
610                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
611                 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
612                              ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
613                              !!(ol_flags & PKT_TX_IP_CKSUM);
614
615                 /* Inner L4 */
616                 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
617         }
618
619         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
620                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
621                 /* HW will update ptr after vlan0 update */
622                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
623                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
624
625                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
626                 /* 2B before end of l2 header */
627                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
628                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
629         }
630
631         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
632                 uint16_t lso_sb;
633                 uint64_t mask;
634
635                 mask = -(!w1.il3type);
636                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
637
638                 send_hdr_ext->w0.lso_sb = lso_sb;
639                 send_hdr_ext->w0.lso = 1;
640                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
641                 send_hdr_ext->w0.lso_format =
642                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
643                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
644
645                 /* Handle tunnel tso */
646                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
647                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
648                         const uint8_t is_udp_tun =
649                                 (CNXK_NIX_UDP_TUN_BITMASK >>
650                                  ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
651                                 0x1;
652                         uint8_t shift = is_udp_tun ? 32 : 0;
653
654                         shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
655                         shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
656
657                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
658                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
659                         /* Update format for UDP tunneled packet */
660                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
661                 }
662         }
663
664         if (flags & NIX_TX_NEED_SEND_HDR_W1)
665                 send_hdr->w1.u = w1.u;
666
667         if (!(flags & NIX_TX_MULTI_SEG_F)) {
668                 sg->seg1_size = send_hdr->w0.total;
669                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
670
671                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
672                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
673                          *              is greater than 1
674                          * DF bit = 0 otherwise
675                          */
676                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
677                 }
678                 /* Mark mempool object as "put" since it is freed by NIX */
679                 if (!send_hdr->w0.df)
680                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
681         } else {
682                 sg->seg1_size = m->data_len;
683                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
684
685                 /* NOFF is handled later for multi-seg */
686         }
687
688         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
689                 *sec = !!(ol_flags & PKT_TX_SEC_OFFLOAD);
690 }
691
692 static __rte_always_inline void
693 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
694                            const uint16_t flags)
695 {
696         struct nix_send_ext_s *send_hdr_ext;
697         union nix_send_sg_s *sg;
698
699         /* With minimal offloads, 'cmd' being local could be optimized out to
700          * registers. In other cases, 'cmd' will be in stack. Intent is
701          * 'cmd' stores content from txq->cmd which is copied only once.
702          */
703         *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
704         lmt_addr += 16;
705         if (flags & NIX_TX_NEED_EXT_HDR) {
706                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
707                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
708                 lmt_addr += 16;
709
710                 sg = (union nix_send_sg_s *)(cmd + 4);
711         } else {
712                 sg = (union nix_send_sg_s *)(cmd + 2);
713         }
714         /* In case of multi-seg, sg template is stored here */
715         *((union nix_send_sg_s *)lmt_addr) = *sg;
716         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
717 }
718
719 static __rte_always_inline void
720 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
721                               const uint64_t ol_flags, const uint16_t no_segdw,
722                               const uint16_t flags)
723 {
724         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
725                 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
726                 struct nix_send_ext_s *send_hdr_ext =
727                         (struct nix_send_ext_s *)lmt_addr + 16;
728                 uint64_t *lmt = (uint64_t *)lmt_addr;
729                 uint16_t off = (no_segdw - 1) << 1;
730                 struct nix_send_mem_s *send_mem;
731
732                 send_mem = (struct nix_send_mem_s *)(lmt + off);
733                 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
734                 send_hdr_ext->w0.tstmp = 1;
735                 if (flags & NIX_TX_MULTI_SEG_F) {
736                         /* Retrieving the default desc values */
737                         lmt[off] = cmd[2];
738
739                         /* Using compiler barier to avoid voilation of C
740                          * aliasing rules.
741                          */
742                         rte_compiler_barrier();
743                 }
744
745                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
746                  * should not be recorded, hence changing the alg type to
747                  * NIX_SENDMEMALG_SET and also changing send mem addr field to
748                  * next 8 bytes as it corrpt the actual tx tstamp registered
749                  * address.
750                  */
751                 send_mem->w0.subdc = NIX_SUBDC_MEM;
752                 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
753                 send_mem->addr =
754                         (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
755         }
756 }
757
758 static __rte_always_inline uint16_t
759 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
760 {
761         struct nix_send_hdr_s *send_hdr;
762         union nix_send_sg_s *sg;
763         struct rte_mbuf *m_next;
764         uint64_t *slist, sg_u;
765         uint64_t nb_segs;
766         uint64_t segdw;
767         uint8_t off, i;
768
769         send_hdr = (struct nix_send_hdr_s *)cmd;
770
771         if (flags & NIX_TX_NEED_EXT_HDR)
772                 off = 2;
773         else
774                 off = 0;
775
776         sg = (union nix_send_sg_s *)&cmd[2 + off];
777
778         /* Start from second segment, first segment is already there */
779         i = 1;
780         sg_u = sg->u;
781         nb_segs = m->nb_segs - 1;
782         m_next = m->next;
783         slist = &cmd[3 + off + 1];
784
785         /* Set invert df if buffer is not to be freed by H/W */
786         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
787                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
788
789                 /* Mark mempool object as "put" since it is freed by NIX */
790 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
791         if (!(sg_u & (1ULL << 55)))
792                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
793         rte_io_wmb();
794 #endif
795         m = m_next;
796         if (!m)
797                 goto done;
798
799         /* Fill mbuf segments */
800         do {
801                 m_next = m->next;
802                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
803                 *slist = rte_mbuf_data_iova(m);
804                 /* Set invert df if buffer is not to be freed by H/W */
805                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
806                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
807                         /* Mark mempool object as "put" since it is freed by NIX
808                          */
809 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
810                 if (!(sg_u & (1ULL << (i + 55))))
811                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
812 #endif
813                 slist++;
814                 i++;
815                 nb_segs--;
816                 if (i > 2 && nb_segs) {
817                         i = 0;
818                         /* Next SG subdesc */
819                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
820                         sg->u = sg_u;
821                         sg->segs = 3;
822                         sg = (union nix_send_sg_s *)slist;
823                         sg_u = sg->u;
824                         slist++;
825                 }
826                 m = m_next;
827         } while (nb_segs);
828
829 done:
830         sg->u = sg_u;
831         sg->segs = i;
832         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
833         /* Roundup extra dwords to multiple of 2 */
834         segdw = (segdw >> 1) + (segdw & 0x1);
835         /* Default dwords */
836         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
837         send_hdr->w0.sizem1 = segdw - 1;
838
839         return segdw;
840 }
841
842 static __rte_always_inline uint16_t
843 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
844                     uint64_t *cmd, uintptr_t base, const uint16_t flags)
845 {
846         struct cn10k_eth_txq *txq = tx_queue;
847         const rte_iova_t io_addr = txq->io_addr;
848         uint8_t lnum, c_lnum, c_shft, c_loff;
849         uintptr_t pa, lbase = txq->lmt_base;
850         uint16_t lmt_id, burst, left, i;
851         uintptr_t c_lbase = lbase;
852         rte_iova_t c_io_addr;
853         uint64_t lso_tun_fmt;
854         uint16_t c_lmt_id;
855         uint64_t sa_base;
856         uintptr_t laddr;
857         uint64_t data;
858         bool sec;
859
860         if (!(flags & NIX_TX_VWQE_F)) {
861                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
862                 /* Reduce the cached count */
863                 txq->fc_cache_pkts -= pkts;
864         }
865
866         /* Get cmd skeleton */
867         cn10k_nix_tx_skeleton(txq, cmd, flags);
868
869         if (flags & NIX_TX_OFFLOAD_TSO_F)
870                 lso_tun_fmt = txq->lso_tun_fmt;
871
872         /* Get LMT base address and LMT ID as lcore id */
873         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
874         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
875                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
876                 c_io_addr = txq->cpt_io_addr;
877                 sa_base = txq->sa_base;
878         }
879
880         left = pkts;
881 again:
882         burst = left > 32 ? 32 : left;
883
884         lnum = 0;
885         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
886                 c_lnum = 0;
887                 c_loff = 0;
888                 c_shft = 16;
889         }
890
891         for (i = 0; i < burst; i++) {
892                 /* Perform header writes for TSO, barrier at
893                  * lmt steorl will suffice.
894                  */
895                 if (flags & NIX_TX_OFFLOAD_TSO_F)
896                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
897
898                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
899                                        &sec);
900
901                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
902
903                 /* Prepare CPT instruction and get nixtx addr */
904                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
905                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
906                                            &c_lnum, &c_loff, &c_shft, sa_base,
907                                            flags);
908
909                 /* Move NIX desc to LMT/NIXTX area */
910                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
911                 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
912                                               tx_pkts[i]->ol_flags, 4, flags);
913                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
914                         lnum++;
915         }
916
917         if (flags & NIX_TX_VWQE_F)
918                 roc_sso_hws_head_wait(base);
919
920         left -= burst;
921         tx_pkts += burst;
922
923         /* Submit CPT instructions if any */
924         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
925                 /* Reduce pkts to be sent to CPT */
926                 burst -= ((c_lnum << 1) + c_loff);
927                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
928                                      c_shft);
929         }
930
931         /* Trigger LMTST */
932         if (burst > 16) {
933                 data = cn10k_nix_tx_steor_data(flags);
934                 pa = io_addr | (data & 0x7) << 4;
935                 data &= ~0x7ULL;
936                 data |= (15ULL << 12);
937                 data |= (uint64_t)lmt_id;
938
939                 /* STEOR0 */
940                 roc_lmt_submit_steorl(data, pa);
941
942                 data = cn10k_nix_tx_steor_data(flags);
943                 pa = io_addr | (data & 0x7) << 4;
944                 data &= ~0x7ULL;
945                 data |= ((uint64_t)(burst - 17)) << 12;
946                 data |= (uint64_t)(lmt_id + 16);
947
948                 /* STEOR1 */
949                 roc_lmt_submit_steorl(data, pa);
950         } else if (burst) {
951                 data = cn10k_nix_tx_steor_data(flags);
952                 pa = io_addr | (data & 0x7) << 4;
953                 data &= ~0x7ULL;
954                 data |= ((uint64_t)(burst - 1)) << 12;
955                 data |= lmt_id;
956
957                 /* STEOR0 */
958                 roc_lmt_submit_steorl(data, pa);
959         }
960
961         rte_io_wmb();
962         if (left)
963                 goto again;
964
965         return pkts;
966 }
967
968 static __rte_always_inline uint16_t
969 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
970                          uint16_t pkts, uint64_t *cmd, uintptr_t base,
971                          const uint16_t flags)
972 {
973         struct cn10k_eth_txq *txq = tx_queue;
974         uintptr_t pa0, pa1, lbase = txq->lmt_base;
975         const rte_iova_t io_addr = txq->io_addr;
976         uint16_t segdw, lmt_id, burst, left, i;
977         uint8_t lnum, c_lnum, c_loff;
978         uintptr_t c_lbase = lbase;
979         uint64_t data0, data1;
980         rte_iova_t c_io_addr;
981         uint64_t lso_tun_fmt;
982         uint8_t shft, c_shft;
983         __uint128_t data128;
984         uint16_t c_lmt_id;
985         uint64_t sa_base;
986         uintptr_t laddr;
987         bool sec;
988
989         NIX_XMIT_FC_OR_RETURN(txq, pkts);
990
991         cn10k_nix_tx_skeleton(txq, cmd, flags);
992
993         /* Reduce the cached count */
994         txq->fc_cache_pkts -= pkts;
995
996         if (flags & NIX_TX_OFFLOAD_TSO_F)
997                 lso_tun_fmt = txq->lso_tun_fmt;
998
999         /* Get LMT base address and LMT ID as lcore id */
1000         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1001         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1002                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1003                 c_io_addr = txq->cpt_io_addr;
1004                 sa_base = txq->sa_base;
1005         }
1006
1007         left = pkts;
1008 again:
1009         burst = left > 32 ? 32 : left;
1010         shft = 16;
1011         data128 = 0;
1012
1013         lnum = 0;
1014         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1015                 c_lnum = 0;
1016                 c_loff = 0;
1017                 c_shft = 16;
1018         }
1019
1020         for (i = 0; i < burst; i++) {
1021                 /* Perform header writes for TSO, barrier at
1022                  * lmt steorl will suffice.
1023                  */
1024                 if (flags & NIX_TX_OFFLOAD_TSO_F)
1025                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1026
1027                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1028                                        &sec);
1029
1030                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1031
1032                 /* Prepare CPT instruction and get nixtx addr */
1033                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1034                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1035                                            &c_lnum, &c_loff, &c_shft, sa_base,
1036                                            flags);
1037
1038                 /* Move NIX desc to LMT/NIXTX area */
1039                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1040
1041                 /* Store sg list directly on lmt line */
1042                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1043                                                flags);
1044                 cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
1045                                               tx_pkts[i]->ol_flags, segdw,
1046                                               flags);
1047                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1048                         lnum++;
1049                         data128 |= (((__uint128_t)(segdw - 1)) << shft);
1050                         shft += 3;
1051                 }
1052         }
1053
1054         if (flags & NIX_TX_VWQE_F)
1055                 roc_sso_hws_head_wait(base);
1056
1057         left -= burst;
1058         tx_pkts += burst;
1059
1060         /* Submit CPT instructions if any */
1061         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1062                 /* Reduce pkts to be sent to CPT */
1063                 burst -= ((c_lnum << 1) + c_loff);
1064                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1065                                      c_shft);
1066         }
1067
1068         data0 = (uint64_t)data128;
1069         data1 = (uint64_t)(data128 >> 64);
1070         /* Make data0 similar to data1 */
1071         data0 >>= 16;
1072         /* Trigger LMTST */
1073         if (burst > 16) {
1074                 pa0 = io_addr | (data0 & 0x7) << 4;
1075                 data0 &= ~0x7ULL;
1076                 /* Move lmtst1..15 sz to bits 63:19 */
1077                 data0 <<= 16;
1078                 data0 |= (15ULL << 12);
1079                 data0 |= (uint64_t)lmt_id;
1080
1081                 /* STEOR0 */
1082                 roc_lmt_submit_steorl(data0, pa0);
1083
1084                 pa1 = io_addr | (data1 & 0x7) << 4;
1085                 data1 &= ~0x7ULL;
1086                 data1 <<= 16;
1087                 data1 |= ((uint64_t)(burst - 17)) << 12;
1088                 data1 |= (uint64_t)(lmt_id + 16);
1089
1090                 /* STEOR1 */
1091                 roc_lmt_submit_steorl(data1, pa1);
1092         } else if (burst) {
1093                 pa0 = io_addr | (data0 & 0x7) << 4;
1094                 data0 &= ~0x7ULL;
1095                 /* Move lmtst1..15 sz to bits 63:19 */
1096                 data0 <<= 16;
1097                 data0 |= ((burst - 1) << 12);
1098                 data0 |= (uint64_t)lmt_id;
1099
1100                 /* STEOR0 */
1101                 roc_lmt_submit_steorl(data0, pa0);
1102         }
1103
1104         rte_io_wmb();
1105         if (left)
1106                 goto again;
1107
1108         return pkts;
1109 }
1110
1111 #if defined(RTE_ARCH_ARM64)
1112
1113 static __rte_always_inline void
1114 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1115                       union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1116                       const uint64_t flags, const uint64_t lso_tun_fmt)
1117 {
1118         uint16_t lso_sb;
1119         uint64_t mask;
1120
1121         if (!(ol_flags & PKT_TX_TCP_SEG))
1122                 return;
1123
1124         mask = -(!w1->il3type);
1125         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1126
1127         w0->u |= BIT(14);
1128         w0->lso_sb = lso_sb;
1129         w0->lso_mps = m->tso_segsz;
1130         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
1131         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1132
1133         /* Handle tunnel tso */
1134         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1135             (ol_flags & PKT_TX_TUNNEL_MASK)) {
1136                 const uint8_t is_udp_tun =
1137                         (CNXK_NIX_UDP_TUN_BITMASK >>
1138                          ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
1139                         0x1;
1140                 uint8_t shift = is_udp_tun ? 32 : 0;
1141
1142                 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
1143                 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
1144
1145                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1146                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1147                 /* Update format for UDP tunneled packet */
1148
1149                 w0->lso_format = (lso_tun_fmt >> shift);
1150         }
1151 }
1152
1153 static __rte_always_inline void
1154 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1155                                 union nix_send_hdr_w0_u *sh,
1156                                 union nix_send_sg_s *sg, const uint32_t flags)
1157 {
1158         struct rte_mbuf *m_next;
1159         uint64_t *slist, sg_u;
1160         uint16_t nb_segs;
1161         int i = 1;
1162
1163         sh->total = m->pkt_len;
1164         /* Clear sg->u header before use */
1165         sg->u &= 0xFC00000000000000;
1166         sg_u = sg->u;
1167         slist = &cmd[0];
1168
1169         sg_u = sg_u | ((uint64_t)m->data_len);
1170
1171         nb_segs = m->nb_segs - 1;
1172         m_next = m->next;
1173
1174         /* Set invert df if buffer is not to be freed by H/W */
1175         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1176                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1177                 /* Mark mempool object as "put" since it is freed by NIX */
1178 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1179         if (!(sg_u & (1ULL << 55)))
1180                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1181         rte_io_wmb();
1182 #endif
1183
1184         m = m_next;
1185         /* Fill mbuf segments */
1186         do {
1187                 m_next = m->next;
1188                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1189                 *slist = rte_mbuf_data_iova(m);
1190                 /* Set invert df if buffer is not to be freed by H/W */
1191                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1192                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1193                         /* Mark mempool object as "put" since it is freed by NIX
1194                          */
1195 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1196                 if (!(sg_u & (1ULL << (i + 55))))
1197                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1198                 rte_io_wmb();
1199 #endif
1200                 slist++;
1201                 i++;
1202                 nb_segs--;
1203                 if (i > 2 && nb_segs) {
1204                         i = 0;
1205                         /* Next SG subdesc */
1206                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1207                         sg->u = sg_u;
1208                         sg->segs = 3;
1209                         sg = (union nix_send_sg_s *)slist;
1210                         sg_u = sg->u;
1211                         slist++;
1212                 }
1213                 m = m_next;
1214         } while (nb_segs);
1215
1216         sg->u = sg_u;
1217         sg->segs = i;
1218 }
1219
1220 static __rte_always_inline void
1221 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1222                            uint64x2_t *cmd1, const uint8_t segdw,
1223                            const uint32_t flags)
1224 {
1225         union nix_send_hdr_w0_u sh;
1226         union nix_send_sg_s sg;
1227
1228         if (m->nb_segs == 1) {
1229                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1230                         sg.u = vgetq_lane_u64(cmd1[0], 0);
1231                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1232                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1233                 }
1234
1235 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1236                 sg.u = vgetq_lane_u64(cmd1[0], 0);
1237                 if (!(sg.u & (1ULL << 55)))
1238                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1239                 rte_io_wmb();
1240 #endif
1241                 return;
1242         }
1243
1244         sh.u = vgetq_lane_u64(cmd0[0], 0);
1245         sg.u = vgetq_lane_u64(cmd1[0], 0);
1246
1247         cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1248
1249         sh.sizem1 = segdw - 1;
1250         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1251         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1252 }
1253
1254 #define NIX_DESCS_PER_LOOP 4
1255
1256 static __rte_always_inline uint8_t
1257 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1258                                uint64x2_t *cmd1, uint64x2_t *cmd2,
1259                                uint64x2_t *cmd3, uint8_t *segdw,
1260                                uint64_t *lmt_addr, __uint128_t *data128,
1261                                uint8_t *shift, const uint16_t flags)
1262 {
1263         uint8_t j, off, lmt_used;
1264
1265         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1266             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1267                 /* No segments in 4 consecutive packets. */
1268                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1269                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1270                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1271                                                            &cmd0[j], &cmd1[j],
1272                                                            segdw[j], flags);
1273                         vst1q_u64(lmt_addr, cmd0[0]);
1274                         vst1q_u64(lmt_addr + 2, cmd1[0]);
1275                         vst1q_u64(lmt_addr + 4, cmd0[1]);
1276                         vst1q_u64(lmt_addr + 6, cmd1[1]);
1277                         vst1q_u64(lmt_addr + 8, cmd0[2]);
1278                         vst1q_u64(lmt_addr + 10, cmd1[2]);
1279                         vst1q_u64(lmt_addr + 12, cmd0[3]);
1280                         vst1q_u64(lmt_addr + 14, cmd1[3]);
1281
1282                         *data128 |= ((__uint128_t)7) << *shift;
1283                         *shift += 3;
1284
1285                         return 1;
1286                 }
1287         }
1288
1289         lmt_used = 0;
1290         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1291                 /* Fit consecutive packets in same LMTLINE. */
1292                 if ((segdw[j] + segdw[j + 1]) <= 8) {
1293                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1294                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1295                                                            &cmd0[j], &cmd1[j],
1296                                                            segdw[j], flags);
1297                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1298                                                            &cmd0[j + 1],
1299                                                            &cmd1[j + 1],
1300                                                            segdw[j + 1], flags);
1301                                 /* TSTAMP takes 4 each, no segs. */
1302                                 vst1q_u64(lmt_addr, cmd0[j]);
1303                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1304                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1305                                 vst1q_u64(lmt_addr + 6, cmd3[j]);
1306
1307                                 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1308                                 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1309                                 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1310                                 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1311                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1312                                 /* EXT header take 3 each, space for 2 segs.*/
1313                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1314                                                            lmt_addr + 6,
1315                                                            &cmd0[j], &cmd1[j],
1316                                                            segdw[j], flags);
1317                                 vst1q_u64(lmt_addr, cmd0[j]);
1318                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1319                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1320                                 off = segdw[j] - 3;
1321                                 off <<= 1;
1322                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1323                                                            lmt_addr + 12 + off,
1324                                                            &cmd0[j + 1],
1325                                                            &cmd1[j + 1],
1326                                                            segdw[j + 1], flags);
1327                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1328                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1329                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1330                         } else {
1331                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1332                                                            lmt_addr + 4,
1333                                                            &cmd0[j], &cmd1[j],
1334                                                            segdw[j], flags);
1335                                 vst1q_u64(lmt_addr, cmd0[j]);
1336                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1337                                 off = segdw[j] - 2;
1338                                 off <<= 1;
1339                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1340                                                            lmt_addr + 8 + off,
1341                                                            &cmd0[j + 1],
1342                                                            &cmd1[j + 1],
1343                                                            segdw[j + 1], flags);
1344                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1345                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1346                         }
1347                         *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1348                                     << *shift;
1349                         *shift += 3;
1350                         j += 2;
1351                 } else {
1352                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
1353                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1354                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1355                                                            lmt_addr + 6,
1356                                                            &cmd0[j], &cmd1[j],
1357                                                            segdw[j], flags);
1358                                 vst1q_u64(lmt_addr, cmd0[j]);
1359                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1360                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1361                                 off = segdw[j] - 4;
1362                                 off <<= 1;
1363                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1364                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1365                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1366                                                            lmt_addr + 6,
1367                                                            &cmd0[j], &cmd1[j],
1368                                                            segdw[j], flags);
1369                                 vst1q_u64(lmt_addr, cmd0[j]);
1370                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1371                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1372                         } else {
1373                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1374                                                            lmt_addr + 4,
1375                                                            &cmd0[j], &cmd1[j],
1376                                                            segdw[j], flags);
1377                                 vst1q_u64(lmt_addr, cmd0[j]);
1378                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1379                         }
1380                         *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1381                         *shift += 3;
1382                         j++;
1383                 }
1384                 lmt_used++;
1385                 lmt_addr += 16;
1386         }
1387
1388         return lmt_used;
1389 }
1390
1391 static __rte_always_inline void
1392 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1393                    uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1394 {
1395         /* Go to next line if we are out of space */
1396         if ((*loff + (dw << 4)) > 128) {
1397                 *data128 = *data128 |
1398                            (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1399                 *shift = *shift + 3;
1400                 *loff = 0;
1401                 *lnum = *lnum + 1;
1402         }
1403
1404         *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1405         *loff = *loff + (dw << 4);
1406 }
1407
1408 static __rte_always_inline void
1409 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1410                      uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1411                      uint64x2_t cmd3, const uint16_t flags)
1412 {
1413         uint8_t off;
1414
1415         /* Handle no fast free when security is enabled without mseg */
1416         if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1417             (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1418             !(flags & NIX_TX_MULTI_SEG_F)) {
1419                 union nix_send_sg_s sg;
1420
1421                 sg.u = vgetq_lane_u64(cmd1, 0);
1422                 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1423                 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1424
1425 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1426                 sg.u = vgetq_lane_u64(cmd1, 0);
1427                 if (!(sg.u & (1ULL << 55)))
1428                         RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1429                                                 0);
1430                 rte_io_wmb();
1431 #endif
1432         }
1433         if (flags & NIX_TX_MULTI_SEG_F) {
1434                 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1435                     (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1436                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1437                                                    &cmd0, &cmd1, segdw, flags);
1438                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1439                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1440                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1441                         off = segdw - 4;
1442                         off <<= 4;
1443                         vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1444                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1445                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1446                                                    &cmd0, &cmd1, segdw, flags);
1447                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1448                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1449                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1450                 } else {
1451                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1452                                                    &cmd0, &cmd1, segdw, flags);
1453                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1454                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1455                 }
1456         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1457                 /* Store the prepared send desc to LMT lines */
1458                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1459                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1460                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1461                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1462                         vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1463                 } else {
1464                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1465                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1466                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1467                 }
1468         } else {
1469                 /* Store the prepared send desc to LMT lines */
1470                 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1471                 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1472         }
1473 }
1474
1475 static __rte_always_inline uint16_t
1476 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1477                            uint16_t pkts, uint64_t *cmd, uintptr_t base,
1478                            const uint16_t flags)
1479 {
1480         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1481         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1482         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1483                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1484         uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1485         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1486         uint64x2_t senddesc01_w0, senddesc23_w0;
1487         uint64x2_t senddesc01_w1, senddesc23_w1;
1488         uint64x2_t sendext01_w0, sendext23_w0;
1489         uint64x2_t sendext01_w1, sendext23_w1;
1490         uint64x2_t sendmem01_w0, sendmem23_w0;
1491         uint64x2_t sendmem01_w1, sendmem23_w1;
1492         uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1493         uint64x2_t sgdesc01_w0, sgdesc23_w0;
1494         uint64x2_t sgdesc01_w1, sgdesc23_w1;
1495         struct cn10k_eth_txq *txq = tx_queue;
1496         rte_iova_t io_addr = txq->io_addr;
1497         uintptr_t laddr = txq->lmt_base;
1498         uint8_t c_lnum, c_shft, c_loff;
1499         uint64x2_t ltypes01, ltypes23;
1500         uint64x2_t xtmp128, ytmp128;
1501         uint64x2_t xmask01, xmask23;
1502         uintptr_t c_laddr = laddr;
1503         uint8_t lnum, shift, loff;
1504         rte_iova_t c_io_addr;
1505         uint64_t sa_base;
1506         union wdata {
1507                 __uint128_t data128;
1508                 uint64_t data[2];
1509         } wd;
1510
1511         if (!(flags & NIX_TX_VWQE_F)) {
1512                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1513                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1514                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1515                 /* Reduce the cached count */
1516                 txq->fc_cache_pkts -= pkts;
1517         } else {
1518                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1519                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1520         }
1521
1522         /* Perform header writes before barrier for TSO */
1523         if (flags & NIX_TX_OFFLOAD_TSO_F) {
1524                 for (i = 0; i < pkts; i++)
1525                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1526         }
1527
1528         senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1529         senddesc23_w0 = senddesc01_w0;
1530         senddesc01_w1 = vdupq_n_u64(0);
1531         senddesc23_w1 = senddesc01_w1;
1532         sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
1533         sgdesc23_w0 = sgdesc01_w0;
1534
1535         /* Load command defaults into vector variables. */
1536         if (flags & NIX_TX_NEED_EXT_HDR) {
1537                 sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
1538                 sendext23_w0 = sendext01_w0;
1539                 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1540                 sendext23_w1 = sendext01_w1;
1541                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1542                         sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
1543                         sendmem23_w0 = sendmem01_w0;
1544                         sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
1545                         sendmem23_w1 = sendmem01_w1;
1546                 }
1547         }
1548
1549         /* Get LMT base address and LMT ID as lcore id */
1550         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1551         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1552                 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1553                 c_io_addr = txq->cpt_io_addr;
1554                 sa_base = txq->sa_base;
1555         }
1556
1557         left = pkts;
1558 again:
1559         /* Number of packets to prepare depends on offloads enabled. */
1560         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1561                               cn10k_nix_pkts_per_vec_brst(flags) :
1562                               left;
1563         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1564                 wd.data128 = 0;
1565                 shift = 16;
1566         }
1567         lnum = 0;
1568         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1569                 loff = 0;
1570                 c_loff = 0;
1571                 c_lnum = 0;
1572                 c_shft = 16;
1573         }
1574
1575         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1576                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1577                         burst = i;
1578                         break;
1579                 }
1580
1581                 if (flags & NIX_TX_MULTI_SEG_F) {
1582                         uint8_t j;
1583
1584                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1585                                 struct rte_mbuf *m = tx_pkts[j];
1586
1587                                 /* Get dwords based on nb_segs. */
1588                                 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1589                                 /* Add dwords based on offloads. */
1590                                 segdw[j] += 1 + /* SEND HDR */
1591                                             !!(flags & NIX_TX_NEED_EXT_HDR) +
1592                                             !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1593                         }
1594
1595                         /* Check if there are enough LMTLINES for this loop */
1596                         if (lnum + 4 > 32) {
1597                                 uint8_t ldwords_con = 0, lneeded = 0;
1598                                 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1599                                         ldwords_con += segdw[j];
1600                                         if (ldwords_con > 8) {
1601                                                 lneeded += 1;
1602                                                 ldwords_con = segdw[j];
1603                                         }
1604                                 }
1605                                 lneeded += 1;
1606                                 if (lnum + lneeded > 32) {
1607                                         burst = i;
1608                                         break;
1609                                 }
1610                         }
1611                 }
1612                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1613                 senddesc01_w0 =
1614                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1615                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1616
1617                 senddesc23_w0 = senddesc01_w0;
1618                 sgdesc23_w0 = sgdesc01_w0;
1619
1620                 /* Clear vlan enables. */
1621                 if (flags & NIX_TX_NEED_EXT_HDR) {
1622                         sendext01_w1 = vbicq_u64(sendext01_w1,
1623                                                  vdupq_n_u64(0x3FFFF00FFFF00));
1624                         sendext23_w1 = sendext01_w1;
1625                 }
1626
1627                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1628                         /* Reset send mem alg to SETTSTMP from SUB*/
1629                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
1630                                                  vdupq_n_u64(BIT_ULL(59)));
1631                         /* Reset send mem address to default. */
1632                         sendmem01_w1 =
1633                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1634                         sendmem23_w0 = sendmem01_w0;
1635                         sendmem23_w1 = sendmem01_w1;
1636                 }
1637
1638                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1639                         /* Clear the LSO enable bit. */
1640                         sendext01_w0 = vbicq_u64(sendext01_w0,
1641                                                  vdupq_n_u64(BIT_ULL(14)));
1642                         sendext23_w0 = sendext01_w0;
1643                 }
1644
1645                 /* Move mbufs to iova */
1646                 mbuf0 = (uint64_t *)tx_pkts[0];
1647                 mbuf1 = (uint64_t *)tx_pkts[1];
1648                 mbuf2 = (uint64_t *)tx_pkts[2];
1649                 mbuf3 = (uint64_t *)tx_pkts[3];
1650
1651                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1652                                      offsetof(struct rte_mbuf, buf_iova));
1653                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1654                                      offsetof(struct rte_mbuf, buf_iova));
1655                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1656                                      offsetof(struct rte_mbuf, buf_iova));
1657                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1658                                      offsetof(struct rte_mbuf, buf_iova));
1659                 /*
1660                  * Get mbuf's, olflags, iova, pktlen, dataoff
1661                  * dataoff_iovaX.D[0] = iova,
1662                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1663                  * len_olflagsX.D[0] = ol_flags,
1664                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1665                  */
1666                 dataoff_iova0 = vld1q_u64(mbuf0);
1667                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1668                 dataoff_iova1 = vld1q_u64(mbuf1);
1669                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1670                 dataoff_iova2 = vld1q_u64(mbuf2);
1671                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1672                 dataoff_iova3 = vld1q_u64(mbuf3);
1673                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1674
1675                 /* Move mbufs to point pool */
1676                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1677                                      offsetof(struct rte_mbuf, pool) -
1678                                      offsetof(struct rte_mbuf, buf_iova));
1679                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1680                                      offsetof(struct rte_mbuf, pool) -
1681                                      offsetof(struct rte_mbuf, buf_iova));
1682                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1683                                      offsetof(struct rte_mbuf, pool) -
1684                                      offsetof(struct rte_mbuf, buf_iova));
1685                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1686                                      offsetof(struct rte_mbuf, pool) -
1687                                      offsetof(struct rte_mbuf, buf_iova));
1688
1689                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1690                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1691                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1692                         /*
1693                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1694                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1695                          */
1696
1697                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1698                                      : [a] "+w"(senddesc01_w1)
1699                                      : [in] "r"(mbuf0 + 2)
1700                                      : "memory");
1701
1702                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1703                                      : [a] "+w"(senddesc01_w1)
1704                                      : [in] "r"(mbuf1 + 2)
1705                                      : "memory");
1706
1707                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1708                                      : [b] "+w"(senddesc23_w1)
1709                                      : [in] "r"(mbuf2 + 2)
1710                                      : "memory");
1711
1712                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1713                                      : [b] "+w"(senddesc23_w1)
1714                                      : [in] "r"(mbuf3 + 2)
1715                                      : "memory");
1716
1717                         /* Get pool pointer alone */
1718                         mbuf0 = (uint64_t *)*mbuf0;
1719                         mbuf1 = (uint64_t *)*mbuf1;
1720                         mbuf2 = (uint64_t *)*mbuf2;
1721                         mbuf3 = (uint64_t *)*mbuf3;
1722                 } else {
1723                         /* Get pool pointer alone */
1724                         mbuf0 = (uint64_t *)*mbuf0;
1725                         mbuf1 = (uint64_t *)*mbuf1;
1726                         mbuf2 = (uint64_t *)*mbuf2;
1727                         mbuf3 = (uint64_t *)*mbuf3;
1728                 }
1729
1730                 const uint8x16_t shuf_mask2 = {
1731                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1732                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1733                 };
1734                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1735                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1736
1737                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1738                 const uint64x2_t and_mask0 = {
1739                         0xFFFFFFFFFFFFFFFF,
1740                         0x000000000000FFFF,
1741                 };
1742
1743                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1744                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1745                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1746                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1747
1748                 /*
1749                  * Pick only 16 bits of pktlen preset at bits 63:32
1750                  * and place them at bits 15:0.
1751                  */
1752                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1753                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1754
1755                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1756                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1757                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1758
1759                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1760                  * pktlen at 15:0 position.
1761                  */
1762                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1763                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1764                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1765                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1766
1767                 /* Move mbuf to point to pool_id. */
1768                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1769                                      offsetof(struct rte_mempool, pool_id));
1770                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1771                                      offsetof(struct rte_mempool, pool_id));
1772                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1773                                      offsetof(struct rte_mempool, pool_id));
1774                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1775                                      offsetof(struct rte_mempool, pool_id));
1776
1777                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1778                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1779                         /*
1780                          * Lookup table to translate ol_flags to
1781                          * il3/il4 types. But we still use ol3/ol4 types in
1782                          * senddesc_w1 as only one header processing is enabled.
1783                          */
1784                         const uint8x16_t tbl = {
1785                                 /* [0-15] = il4type:il3type */
1786                                 0x04, /* none (IPv6 assumed) */
1787                                 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
1788                                 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
1789                                 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
1790                                 0x03, /* PKT_TX_IP_CKSUM */
1791                                 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
1792                                 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
1793                                 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
1794                                 0x02, /* PKT_TX_IPV4  */
1795                                 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
1796                                 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
1797                                 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
1798                                 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
1799                                 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1800                                        * PKT_TX_TCP_CKSUM
1801                                        */
1802                                 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1803                                        * PKT_TX_SCTP_CKSUM
1804                                        */
1805                                 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1806                                        * PKT_TX_UDP_CKSUM
1807                                        */
1808                         };
1809
1810                         /* Extract olflags to translate to iltypes */
1811                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1812                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1813
1814                         /*
1815                          * E(47):L3_LEN(9):L2_LEN(7+z)
1816                          * E(47):L3_LEN(9):L2_LEN(7+z)
1817                          */
1818                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1819                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1820
1821                         /* Move OLFLAGS bits 55:52 to 51:48
1822                          * with zeros preprended on the byte and rest
1823                          * don't care
1824                          */
1825                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1826                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1827                         /*
1828                          * E(48):L3_LEN(8):L2_LEN(z+7)
1829                          * E(48):L3_LEN(8):L2_LEN(z+7)
1830                          */
1831                         const int8x16_t tshft3 = {
1832                                 -1, 0, 8, 8, 8, 8, 8, 8,
1833                                 -1, 0, 8, 8, 8, 8, 8, 8,
1834                         };
1835
1836                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1837                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1838
1839                         /* Do the lookup */
1840                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1841                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1842
1843                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1844                          * and place it in ol3/ol4type of senddesc_w1
1845                          */
1846                         const uint8x16_t shuf_mask0 = {
1847                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1848                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1849                         };
1850
1851                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1852                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1853
1854                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1855                          * a [E(32):E(16):OL3(8):OL2(8)]
1856                          * a = a + (a << 8)
1857                          * a [E(32):E(16):(OL3+OL2):OL2]
1858                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1859                          */
1860                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1861                                                  vshlq_n_u16(senddesc01_w1, 8));
1862                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1863                                                  vshlq_n_u16(senddesc23_w1, 8));
1864
1865                         /* Move ltypes to senddesc*_w1 */
1866                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1867                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1868                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1869                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1870                         /*
1871                          * Lookup table to translate ol_flags to
1872                          * ol3/ol4 types.
1873                          */
1874
1875                         const uint8x16_t tbl = {
1876                                 /* [0-15] = ol4type:ol3type */
1877                                 0x00, /* none */
1878                                 0x03, /* OUTER_IP_CKSUM */
1879                                 0x02, /* OUTER_IPV4 */
1880                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1881                                 0x04, /* OUTER_IPV6 */
1882                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1883                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1884                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1885                                        * OUTER_IP_CKSUM
1886                                        */
1887                                 0x00, /* OUTER_UDP_CKSUM */
1888                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1889                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1890                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1891                                        * OUTER_IP_CKSUM
1892                                        */
1893                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1894                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1895                                        * OUTER_IP_CKSUM
1896                                        */
1897                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1898                                        * OUTER_IPV4
1899                                        */
1900                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1901                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1902                                        */
1903                         };
1904
1905                         /* Extract olflags to translate to iltypes */
1906                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1907                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1908
1909                         /*
1910                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1911                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1912                          */
1913                         const uint8x16_t shuf_mask5 = {
1914                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1915                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1916                         };
1917                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1918                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1919
1920                         /* Extract outer ol flags only */
1921                         const uint64x2_t o_cksum_mask = {
1922                                 0x1C00020000000000,
1923                                 0x1C00020000000000,
1924                         };
1925
1926                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1927                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1928
1929                         /* Extract OUTER_UDP_CKSUM bit 41 and
1930                          * move it to bit 61
1931                          */
1932
1933                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1934                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1935
1936                         /* Shift oltype by 2 to start nibble from BIT(56)
1937                          * instead of BIT(58)
1938                          */
1939                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1940                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1941                         /*
1942                          * E(48):L3_LEN(8):L2_LEN(z+7)
1943                          * E(48):L3_LEN(8):L2_LEN(z+7)
1944                          */
1945                         const int8x16_t tshft3 = {
1946                                 -1, 0, 8, 8, 8, 8, 8, 8,
1947                                 -1, 0, 8, 8, 8, 8, 8, 8,
1948                         };
1949
1950                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1951                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1952
1953                         /* Do the lookup */
1954                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1955                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1956
1957                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1958                          * and place it in ol3/ol4type of senddesc_w1
1959                          */
1960                         const uint8x16_t shuf_mask0 = {
1961                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1962                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1963                         };
1964
1965                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1966                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1967
1968                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1969                          * a [E(32):E(16):OL3(8):OL2(8)]
1970                          * a = a + (a << 8)
1971                          * a [E(32):E(16):(OL3+OL2):OL2]
1972                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1973                          */
1974                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1975                                                  vshlq_n_u16(senddesc01_w1, 8));
1976                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1977                                                  vshlq_n_u16(senddesc23_w1, 8));
1978
1979                         /* Move ltypes to senddesc*_w1 */
1980                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1981                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1982                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1983                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1984                         /* Lookup table to translate ol_flags to
1985                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1986                          */
1987                         const uint8x16x2_t tbl = {{
1988                                 {
1989                                         /* [0-15] = il4type:il3type */
1990                                         0x04, /* none (IPv6) */
1991                                         0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1992                                         0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1993                                         0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1994                                         0x03, /* PKT_TX_IP_CKSUM */
1995                                         0x13, /* PKT_TX_IP_CKSUM |
1996                                                * PKT_TX_TCP_CKSUM
1997                                                */
1998                                         0x23, /* PKT_TX_IP_CKSUM |
1999                                                * PKT_TX_SCTP_CKSUM
2000                                                */
2001                                         0x33, /* PKT_TX_IP_CKSUM |
2002                                                * PKT_TX_UDP_CKSUM
2003                                                */
2004                                         0x02, /* PKT_TX_IPV4 */
2005                                         0x12, /* PKT_TX_IPV4 |
2006                                                * PKT_TX_TCP_CKSUM
2007                                                */
2008                                         0x22, /* PKT_TX_IPV4 |
2009                                                * PKT_TX_SCTP_CKSUM
2010                                                */
2011                                         0x32, /* PKT_TX_IPV4 |
2012                                                * PKT_TX_UDP_CKSUM
2013                                                */
2014                                         0x03, /* PKT_TX_IPV4 |
2015                                                * PKT_TX_IP_CKSUM
2016                                                */
2017                                         0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
2018                                                * PKT_TX_TCP_CKSUM
2019                                                */
2020                                         0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
2021                                                * PKT_TX_SCTP_CKSUM
2022                                                */
2023                                         0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
2024                                                * PKT_TX_UDP_CKSUM
2025                                                */
2026                                 },
2027
2028                                 {
2029                                         /* [16-31] = ol4type:ol3type */
2030                                         0x00, /* none */
2031                                         0x03, /* OUTER_IP_CKSUM */
2032                                         0x02, /* OUTER_IPV4 */
2033                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2034                                         0x04, /* OUTER_IPV6 */
2035                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2036                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2037                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2038                                                * OUTER_IP_CKSUM
2039                                                */
2040                                         0x00, /* OUTER_UDP_CKSUM */
2041                                         0x33, /* OUTER_UDP_CKSUM |
2042                                                * OUTER_IP_CKSUM
2043                                                */
2044                                         0x32, /* OUTER_UDP_CKSUM |
2045                                                * OUTER_IPV4
2046                                                */
2047                                         0x33, /* OUTER_UDP_CKSUM |
2048                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2049                                                */
2050                                         0x34, /* OUTER_UDP_CKSUM |
2051                                                * OUTER_IPV6
2052                                                */
2053                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2054                                                * OUTER_IP_CKSUM
2055                                                */
2056                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2057                                                * OUTER_IPV4
2058                                                */
2059                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2060                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2061                                                */
2062                                 },
2063                         }};
2064
2065                         /* Extract olflags to translate to oltype & iltype */
2066                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2067                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2068
2069                         /*
2070                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2071                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2072                          */
2073                         const uint32x4_t tshft_4 = {
2074                                 1,
2075                                 0,
2076                                 1,
2077                                 0,
2078                         };
2079                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2080                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2081
2082                         /*
2083                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2084                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2085                          */
2086                         const uint8x16_t shuf_mask5 = {
2087                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2088                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2089                         };
2090                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2091                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2092
2093                         /* Extract outer and inner header ol_flags */
2094                         const uint64x2_t oi_cksum_mask = {
2095                                 0x1CF0020000000000,
2096                                 0x1CF0020000000000,
2097                         };
2098
2099                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2100                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2101
2102                         /* Extract OUTER_UDP_CKSUM bit 41 and
2103                          * move it to bit 61
2104                          */
2105
2106                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2107                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2108
2109                         /* Shift right oltype by 2 and iltype by 4
2110                          * to start oltype nibble from BIT(58)
2111                          * instead of BIT(56) and iltype nibble from BIT(48)
2112                          * instead of BIT(52).
2113                          */
2114                         const int8x16_t tshft5 = {
2115                                 8, 8, 8, 8, 8, 8, -4, -2,
2116                                 8, 8, 8, 8, 8, 8, -4, -2,
2117                         };
2118
2119                         xtmp128 = vshlq_u8(xtmp128, tshft5);
2120                         ytmp128 = vshlq_u8(ytmp128, tshft5);
2121                         /*
2122                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2123                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2124                          */
2125                         const int8x16_t tshft3 = {
2126                                 -1, 0, -1, 0, 0, 0, 0, 0,
2127                                 -1, 0, -1, 0, 0, 0, 0, 0,
2128                         };
2129
2130                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2131                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2132
2133                         /* Mark Bit(4) of oltype */
2134                         const uint64x2_t oi_cksum_mask2 = {
2135                                 0x1000000000000000,
2136                                 0x1000000000000000,
2137                         };
2138
2139                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2140                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2141
2142                         /* Do the lookup */
2143                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2144                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2145
2146                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
2147                          * Bit 56:63 of oltype and place it in corresponding
2148                          * place in senddesc_w1.
2149                          */
2150                         const uint8x16_t shuf_mask0 = {
2151                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2152                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2153                         };
2154
2155                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2156                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2157
2158                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2159                          * l3len, l2len, ol3len, ol2len.
2160                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2161                          * a = a + (a << 8)
2162                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2163                          * a = a + (a << 16)
2164                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2165                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2166                          */
2167                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2168                                                  vshlq_n_u32(senddesc01_w1, 8));
2169                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2170                                                  vshlq_n_u32(senddesc23_w1, 8));
2171
2172                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2173                         senddesc01_w1 = vaddq_u8(
2174                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2175                         senddesc23_w1 = vaddq_u8(
2176                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2177
2178                         /* Move ltypes to senddesc*_w1 */
2179                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2180                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2181                 }
2182
2183                 xmask01 = vdupq_n_u64(0);
2184                 xmask23 = xmask01;
2185                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2186                              : [a] "+w"(xmask01)
2187                              : [in] "r"(mbuf0)
2188                              : "memory");
2189
2190                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2191                              : [a] "+w"(xmask01)
2192                              : [in] "r"(mbuf1)
2193                              : "memory");
2194
2195                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2196                              : [b] "+w"(xmask23)
2197                              : [in] "r"(mbuf2)
2198                              : "memory");
2199
2200                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2201                              : [b] "+w"(xmask23)
2202                              : [in] "r"(mbuf3)
2203                              : "memory");
2204                 xmask01 = vshlq_n_u64(xmask01, 20);
2205                 xmask23 = vshlq_n_u64(xmask23, 20);
2206
2207                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2208                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2209
2210                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2211                         /* Tx ol_flag for vlan. */
2212                         const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
2213                         /* Bit enable for VLAN1 */
2214                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2215                         /* Tx ol_flag for QnQ. */
2216                         const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
2217                         /* Bit enable for VLAN0 */
2218                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2219                         /* Load vlan values from packet. outer is VLAN 0 */
2220                         uint64x2_t ext01 = {
2221                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2222                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2223                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2224                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2225                         };
2226                         uint64x2_t ext23 = {
2227                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2228                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2229                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2230                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2231                         };
2232
2233                         /* Get ol_flags of the packets. */
2234                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2235                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2236
2237                         /* ORR vlan outer/inner values into cmd. */
2238                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2239                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2240
2241                         /* Test for offload enable bits and generate masks. */
2242                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2243                                                       mlv),
2244                                             vandq_u64(vtstq_u64(xtmp128, olq),
2245                                                       mlq));
2246                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2247                                                       mlv),
2248                                             vandq_u64(vtstq_u64(ytmp128, olq),
2249                                                       mlq));
2250
2251                         /* Set vlan enable bits into cmd based on mask. */
2252                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2253                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2254                 }
2255
2256                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2257                         /* Tx ol_flag for timestam. */
2258                         const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
2259                                                 PKT_TX_IEEE1588_TMST};
2260                         /* Set send mem alg to SUB. */
2261                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2262                         /* Increment send mem address by 8. */
2263                         const uint64x2_t addr = {0x8, 0x8};
2264
2265                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2266                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2267
2268                         /* Check if timestamp is requested and generate inverted
2269                          * mask as we need not make any changes to default cmd
2270                          * value.
2271                          */
2272                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2273                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2274
2275                         /* Change send mem address to an 8 byte offset when
2276                          * TSTMP is disabled.
2277                          */
2278                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
2279                                                  vandq_u64(xtmp128, addr));
2280                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
2281                                                  vandq_u64(ytmp128, addr));
2282                         /* Change send mem alg to SUB when TSTMP is disabled. */
2283                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
2284                                                  vandq_u64(xtmp128, alg));
2285                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
2286                                                  vandq_u64(ytmp128, alg));
2287
2288                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2289                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2290                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2291                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2292                 }
2293
2294                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2295                         const uint64_t lso_fmt = txq->lso_tun_fmt;
2296                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2297                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2298
2299                         /* Extract SD W1 as we need to set L4 types. */
2300                         vst1q_u64(sd_w1, senddesc01_w1);
2301                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
2302
2303                         /* Extract SX W0 as we need to set LSO fields. */
2304                         vst1q_u64(sx_w0, sendext01_w0);
2305                         vst1q_u64(sx_w0 + 2, sendext23_w0);
2306
2307                         /* Extract ol_flags. */
2308                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2309                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2310
2311                         /* Prepare individual mbufs. */
2312                         cn10k_nix_prepare_tso(tx_pkts[0],
2313                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
2314                                 (union nix_send_ext_w0_u *)&sx_w0[0],
2315                                 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2316
2317                         cn10k_nix_prepare_tso(tx_pkts[1],
2318                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
2319                                 (union nix_send_ext_w0_u *)&sx_w0[1],
2320                                 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2321
2322                         cn10k_nix_prepare_tso(tx_pkts[2],
2323                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
2324                                 (union nix_send_ext_w0_u *)&sx_w0[2],
2325                                 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2326
2327                         cn10k_nix_prepare_tso(tx_pkts[3],
2328                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
2329                                 (union nix_send_ext_w0_u *)&sx_w0[3],
2330                                 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2331
2332                         senddesc01_w1 = vld1q_u64(sd_w1);
2333                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2334
2335                         sendext01_w0 = vld1q_u64(sx_w0);
2336                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
2337                 }
2338
2339                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2340                     !(flags & NIX_TX_MULTI_SEG_F) &&
2341                     !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2342                         /* Set don't free bit if reference count > 1 */
2343                         xmask01 = vdupq_n_u64(0);
2344                         xmask23 = xmask01;
2345
2346                         /* Move mbufs to iova */
2347                         mbuf0 = (uint64_t *)tx_pkts[0];
2348                         mbuf1 = (uint64_t *)tx_pkts[1];
2349                         mbuf2 = (uint64_t *)tx_pkts[2];
2350                         mbuf3 = (uint64_t *)tx_pkts[3];
2351
2352                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2353                                 vsetq_lane_u64(0x80000, xmask01, 0);
2354                         else
2355                                 RTE_MEMPOOL_CHECK_COOKIES(
2356                                         ((struct rte_mbuf *)mbuf0)->pool,
2357                                         (void **)&mbuf0, 1, 0);
2358
2359                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2360                                 vsetq_lane_u64(0x80000, xmask01, 1);
2361                         else
2362                                 RTE_MEMPOOL_CHECK_COOKIES(
2363                                         ((struct rte_mbuf *)mbuf1)->pool,
2364                                         (void **)&mbuf1, 1, 0);
2365
2366                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2367                                 vsetq_lane_u64(0x80000, xmask23, 0);
2368                         else
2369                                 RTE_MEMPOOL_CHECK_COOKIES(
2370                                         ((struct rte_mbuf *)mbuf2)->pool,
2371                                         (void **)&mbuf2, 1, 0);
2372
2373                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2374                                 vsetq_lane_u64(0x80000, xmask23, 1);
2375                         else
2376                                 RTE_MEMPOOL_CHECK_COOKIES(
2377                                         ((struct rte_mbuf *)mbuf3)->pool,
2378                                         (void **)&mbuf3, 1, 0);
2379                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2380                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2381                 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2382                            !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2383                         /* Move mbufs to iova */
2384                         mbuf0 = (uint64_t *)tx_pkts[0];
2385                         mbuf1 = (uint64_t *)tx_pkts[1];
2386                         mbuf2 = (uint64_t *)tx_pkts[2];
2387                         mbuf3 = (uint64_t *)tx_pkts[3];
2388
2389                         /* Mark mempool object as "put" since
2390                          * it is freed by NIX
2391                          */
2392                         RTE_MEMPOOL_CHECK_COOKIES(
2393                                 ((struct rte_mbuf *)mbuf0)->pool,
2394                                 (void **)&mbuf0, 1, 0);
2395
2396                         RTE_MEMPOOL_CHECK_COOKIES(
2397                                 ((struct rte_mbuf *)mbuf1)->pool,
2398                                 (void **)&mbuf1, 1, 0);
2399
2400                         RTE_MEMPOOL_CHECK_COOKIES(
2401                                 ((struct rte_mbuf *)mbuf2)->pool,
2402                                 (void **)&mbuf2, 1, 0);
2403
2404                         RTE_MEMPOOL_CHECK_COOKIES(
2405                                 ((struct rte_mbuf *)mbuf3)->pool,
2406                                 (void **)&mbuf3, 1, 0);
2407                 }
2408
2409                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2410                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2411                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2412                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2413                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2414
2415                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2416                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2417                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2418                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2419
2420                 if (flags & NIX_TX_NEED_EXT_HDR) {
2421                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2422                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2423                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2424                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2425                 }
2426
2427                 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2428                         const uint64x2_t olf = {PKT_TX_SEC_OFFLOAD,
2429                                                 PKT_TX_SEC_OFFLOAD};
2430                         uintptr_t next;
2431                         uint8_t dw;
2432
2433                         /* Extract ol_flags. */
2434                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2435                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2436
2437                         xtmp128 = vtstq_u64(olf, xtmp128);
2438                         ytmp128 = vtstq_u64(olf, ytmp128);
2439
2440                         /* Process mbuf0 */
2441                         dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2442                         if (vgetq_lane_u64(xtmp128, 0))
2443                                 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2444                                                        &cmd1[0], &next, c_laddr,
2445                                                        &c_lnum, &c_loff,
2446                                                        &c_shft, sa_base, flags);
2447                         else
2448                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2449                                                    &shift, &wd.data128, &next);
2450
2451                         /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2452                         cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2453                                              cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2454                                              flags);
2455
2456                         /* Process mbuf1 */
2457                         dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2458                         if (vgetq_lane_u64(xtmp128, 1))
2459                                 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2460                                                        &cmd1[1], &next, c_laddr,
2461                                                        &c_lnum, &c_loff,
2462                                                        &c_shft, sa_base, flags);
2463                         else
2464                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2465                                                    &shift, &wd.data128, &next);
2466
2467                         /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2468                         cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2469                                              cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2470                                              flags);
2471
2472                         /* Process mbuf2 */
2473                         dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2474                         if (vgetq_lane_u64(ytmp128, 0))
2475                                 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2476                                                        &cmd1[2], &next, c_laddr,
2477                                                        &c_lnum, &c_loff,
2478                                                        &c_shft, sa_base, flags);
2479                         else
2480                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2481                                                    &shift, &wd.data128, &next);
2482
2483                         /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2484                         cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2485                                              cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2486                                              flags);
2487
2488                         /* Process mbuf3 */
2489                         dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2490                         if (vgetq_lane_u64(ytmp128, 1))
2491                                 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2492                                                        &cmd1[3], &next, c_laddr,
2493                                                        &c_lnum, &c_loff,
2494                                                        &c_shft, sa_base, flags);
2495                         else
2496                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2497                                                    &shift, &wd.data128, &next);
2498
2499                         /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2500                         cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2501                                              cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2502                                              flags);
2503
2504                 } else if (flags & NIX_TX_MULTI_SEG_F) {
2505                         uint8_t j;
2506
2507                         segdw[4] = 8;
2508                         j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2509                                                           cmd2, cmd3, segdw,
2510                                                           (uint64_t *)
2511                                                           LMT_OFF(laddr, lnum,
2512                                                                   0),
2513                                                           &wd.data128, &shift,
2514                                                           flags);
2515                         lnum += j;
2516                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2517                         /* Store the prepared send desc to LMT lines */
2518                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2519                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2520                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2521                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2522                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2523                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2524                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2525                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2526                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2527                                 lnum += 1;
2528                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2529                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2530                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2531                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2532                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2533                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2534                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2535                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2536                         } else {
2537                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2538                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2539                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2540                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2541                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2542                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2543                                 lnum += 1;
2544                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2545                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2546                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2547                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2548                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2549                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2550                         }
2551                         lnum += 1;
2552                 } else {
2553                         /* Store the prepared send desc to LMT lines */
2554                         vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2555                         vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2556                         vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2557                         vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2558                         vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2559                         vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2560                         vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2561                         vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2562                         lnum += 1;
2563                 }
2564
2565                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2566         }
2567
2568         /* Roundup lnum to last line if it is partial */
2569         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2570                 lnum = lnum + !!loff;
2571                 wd.data128 = wd.data128 |
2572                         (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2573         }
2574
2575         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2576                 wd.data[0] >>= 16;
2577
2578         if (flags & NIX_TX_VWQE_F)
2579                 roc_sso_hws_head_wait(base);
2580
2581         left -= burst;
2582
2583         /* Submit CPT instructions if any */
2584         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
2585                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2586                                      c_shft);
2587
2588         /* Trigger LMTST */
2589         if (lnum > 16) {
2590                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2591                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2592
2593                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2594                 wd.data[0] &= ~0x7ULL;
2595
2596                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2597                         wd.data[0] <<= 16;
2598
2599                 wd.data[0] |= (15ULL << 12);
2600                 wd.data[0] |= (uint64_t)lmt_id;
2601
2602                 /* STEOR0 */
2603                 roc_lmt_submit_steorl(wd.data[0], pa);
2604
2605                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2606                         wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2607
2608                 pa = io_addr | (wd.data[1] & 0x7) << 4;
2609                 wd.data[1] &= ~0x7ULL;
2610
2611                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2612                         wd.data[1] <<= 16;
2613
2614                 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2615                 wd.data[1] |= (uint64_t)(lmt_id + 16);
2616
2617                 /* STEOR1 */
2618                 roc_lmt_submit_steorl(wd.data[1], pa);
2619         } else if (lnum) {
2620                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2621                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2622
2623                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2624                 wd.data[0] &= ~0x7ULL;
2625
2626                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2627                         wd.data[0] <<= 16;
2628
2629                 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2630                 wd.data[0] |= lmt_id;
2631
2632                 /* STEOR0 */
2633                 roc_lmt_submit_steorl(wd.data[0], pa);
2634         }
2635
2636         rte_io_wmb();
2637         if (left)
2638                 goto again;
2639
2640         if (unlikely(scalar)) {
2641                 if (flags & NIX_TX_MULTI_SEG_F)
2642                         pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2643                                                          scalar, cmd, base,
2644                                                          flags);
2645                 else
2646                         pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
2647                                                     cmd, base, flags);
2648         }
2649
2650         return pkts;
2651 }
2652
2653 #else
2654 static __rte_always_inline uint16_t
2655 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2656                            uint16_t pkts, uint64_t *cmd, uintptr_t base,
2657                            const uint16_t flags)
2658 {
2659         RTE_SET_USED(tx_queue);
2660         RTE_SET_USED(tx_pkts);
2661         RTE_SET_USED(pkts);
2662         RTE_SET_USED(cmd);
2663         RTE_SET_USED(flags);
2664         RTE_SET_USED(base);
2665         return 0;
2666 }
2667 #endif
2668
2669 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2670 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2671 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2672 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2673 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
2674 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2675 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2676
2677 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2678 #define NIX_TX_FASTPATH_MODES                                           \
2679 T(no_offload,                           0, 0, 0, 0, 0, 0, 0,    4,      \
2680                 NIX_TX_OFFLOAD_NONE)                                    \
2681 T(l3l4csum,                             0, 0, 0, 0, 0, 0, 1,    4,      \
2682                 L3L4CSUM_F)                                             \
2683 T(ol3ol4csum,                           0, 0, 0, 0, 0, 1, 0,    4,      \
2684                 OL3OL4CSUM_F)                                           \
2685 T(ol3ol4csum_l3l4csum,                  0, 0, 0, 0, 0, 1, 1,    4,      \
2686                 OL3OL4CSUM_F | L3L4CSUM_F)                              \
2687 T(vlan,                                 0, 0, 0, 0, 1, 0, 0,    6,      \
2688                 VLAN_F)                                                 \
2689 T(vlan_l3l4csum,                        0, 0, 0, 0, 1, 0, 1,    6,      \
2690                 VLAN_F | L3L4CSUM_F)                                    \
2691 T(vlan_ol3ol4csum,                      0, 0, 0, 0, 1, 1, 0,    6,      \
2692                 VLAN_F | OL3OL4CSUM_F)                                  \
2693 T(vlan_ol3ol4csum_l3l4csum,             0, 0, 0, 0, 1, 1, 1,    6,      \
2694                 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                     \
2695 T(noff,                                 0, 0, 0, 1, 0, 0, 0,    4,      \
2696                 NOFF_F)                                                 \
2697 T(noff_l3l4csum,                        0, 0, 0, 1, 0, 0, 1,    4,      \
2698                 NOFF_F | L3L4CSUM_F)                                    \
2699 T(noff_ol3ol4csum,                      0, 0, 0, 1, 0, 1, 0,    4,      \
2700                 NOFF_F | OL3OL4CSUM_F)                                  \
2701 T(noff_ol3ol4csum_l3l4csum,             0, 0, 0, 1, 0, 1, 1,    4,      \
2702                 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                     \
2703 T(noff_vlan,                            0, 0, 0, 1, 1, 0, 0,    6,      \
2704                 NOFF_F | VLAN_F)                                        \
2705 T(noff_vlan_l3l4csum,                   0, 0, 0, 1, 1, 0, 1,    6,      \
2706                 NOFF_F | VLAN_F | L3L4CSUM_F)                           \
2707 T(noff_vlan_ol3ol4csum,                 0, 0, 0, 1, 1, 1, 0,    6,      \
2708                 NOFF_F | VLAN_F | OL3OL4CSUM_F)                         \
2709 T(noff_vlan_ol3ol4csum_l3l4csum,        0, 0, 0, 1, 1, 1, 1,    6,      \
2710                 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)            \
2711 T(tso,                                  0, 0, 1, 0, 0, 0, 0,    6,      \
2712                 TSO_F)                                                  \
2713 T(tso_l3l4csum,                         0, 0, 1, 0, 0, 0, 1,    6,      \
2714                 TSO_F | L3L4CSUM_F)                                     \
2715 T(tso_ol3ol4csum,                       0, 0, 1, 0, 0, 1, 0,    6,      \
2716                 TSO_F | OL3OL4CSUM_F)                                   \
2717 T(tso_ol3ol4csum_l3l4csum,              0, 0, 1, 0, 0, 1, 1,    6,      \
2718                 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                      \
2719 T(tso_vlan,                             0, 0, 1, 0, 1, 0, 0,    6,      \
2720                 TSO_F | VLAN_F)                                         \
2721 T(tso_vlan_l3l4csum,                    0, 0, 1, 0, 1, 0, 1,    6,      \
2722                 TSO_F | VLAN_F | L3L4CSUM_F)                            \
2723 T(tso_vlan_ol3ol4csum,                  0, 0, 1, 0, 1, 1, 0,    6,      \
2724                 TSO_F | VLAN_F | OL3OL4CSUM_F)                          \
2725 T(tso_vlan_ol3ol4csum_l3l4csum,         0, 0, 1, 0, 1, 1, 1,    6,      \
2726                 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
2727 T(tso_noff,                             0, 0, 1, 1, 0, 0, 0,    6,      \
2728                 TSO_F | NOFF_F)                                         \
2729 T(tso_noff_l3l4csum,                    0, 0, 1, 1, 0, 0, 1,    6,      \
2730                 TSO_F | NOFF_F | L3L4CSUM_F)                            \
2731 T(tso_noff_ol3ol4csum,                  0, 0, 1, 1, 0, 1, 0,    6,      \
2732                 TSO_F | NOFF_F | OL3OL4CSUM_F)                          \
2733 T(tso_noff_ol3ol4csum_l3l4csum,         0, 0, 1, 1, 0, 1, 1,    6,      \
2734                 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
2735 T(tso_noff_vlan,                        0, 0, 1, 1, 1, 0, 0,    6,      \
2736                 TSO_F | NOFF_F | VLAN_F)                                \
2737 T(tso_noff_vlan_l3l4csum,               0, 0, 1, 1, 1, 0, 1,    6,      \
2738                 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                   \
2739 T(tso_noff_vlan_ol3ol4csum,             0, 0, 1, 1, 1, 1, 0,    6,      \
2740                 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                 \
2741 T(tso_noff_vlan_ol3ol4csum_l3l4csum,    0, 0, 1, 1, 1, 1, 1,    6,      \
2742                 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
2743 T(ts,                                   0, 1, 0, 0, 0, 0, 0,    8,      \
2744                 TSP_F)                                                  \
2745 T(ts_l3l4csum,                          0, 1, 0, 0, 0, 0, 1,    8,      \
2746                 TSP_F | L3L4CSUM_F)                                     \
2747 T(ts_ol3ol4csum,                        0, 1, 0, 0, 0, 1, 0,    8,      \
2748                 TSP_F | OL3OL4CSUM_F)                                   \
2749 T(ts_ol3ol4csum_l3l4csum,               0, 1, 0, 0, 0, 1, 1,    8,      \
2750                 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                      \
2751 T(ts_vlan,                              0, 1, 0, 0, 1, 0, 0,    8,      \
2752                 TSP_F | VLAN_F)                                         \
2753 T(ts_vlan_l3l4csum,                     0, 1, 0, 0, 1, 0, 1,    8,      \
2754                 TSP_F | VLAN_F | L3L4CSUM_F)                            \
2755 T(ts_vlan_ol3ol4csum,                   0, 1, 0, 0, 1, 1, 0,    8,      \
2756                 TSP_F | VLAN_F | OL3OL4CSUM_F)                          \
2757 T(ts_vlan_ol3ol4csum_l3l4csum,          0, 1, 0, 0, 1, 1, 1,    8,      \
2758                 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
2759 T(ts_noff,                              0, 1, 0, 1, 0, 0, 0,    8,      \
2760                 TSP_F | NOFF_F)                                         \
2761 T(ts_noff_l3l4csum,                     0, 1, 0, 1, 0, 0, 1,    8,      \
2762                 TSP_F | NOFF_F | L3L4CSUM_F)                            \
2763 T(ts_noff_ol3ol4csum,                   0, 1, 0, 1, 0, 1, 0,    8,      \
2764                 TSP_F | NOFF_F | OL3OL4CSUM_F)                          \
2765 T(ts_noff_ol3ol4csum_l3l4csum,          0, 1, 0, 1, 0, 1, 1,    8,      \
2766                 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
2767 T(ts_noff_vlan,                         0, 1, 0, 1, 1, 0, 0,    8,      \
2768                 TSP_F | NOFF_F | VLAN_F)                                \
2769 T(ts_noff_vlan_l3l4csum,                0, 1, 0, 1, 1, 0, 1,    8,      \
2770                 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                   \
2771 T(ts_noff_vlan_ol3ol4csum,              0, 1, 0, 1, 1, 1, 0,    8,      \
2772                 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                 \
2773 T(ts_noff_vlan_ol3ol4csum_l3l4csum,     0, 1, 0, 1, 1, 1, 1,    8,      \
2774                 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
2775 T(ts_tso,                               0, 1, 1, 0, 0, 0, 0,    8,      \
2776                 TSP_F | TSO_F)                                          \
2777 T(ts_tso_l3l4csum,                      0, 1, 1, 0, 0, 0, 1,    8,      \
2778                 TSP_F | TSO_F | L3L4CSUM_F)                             \
2779 T(ts_tso_ol3ol4csum,                    0, 1, 1, 0, 0, 1, 0,    8,      \
2780                 TSP_F | TSO_F | OL3OL4CSUM_F)                           \
2781 T(ts_tso_ol3ol4csum_l3l4csum,           0, 1, 1, 0, 0, 1, 1,    8,      \
2782                 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)              \
2783 T(ts_tso_vlan,                          0, 1, 1, 0, 1, 0, 0,    8,      \
2784                 TSP_F | TSO_F | VLAN_F)                                 \
2785 T(ts_tso_vlan_l3l4csum,                 0, 1, 1, 0, 1, 0, 1,    8,      \
2786                 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                    \
2787 T(ts_tso_vlan_ol3ol4csum,               0, 1, 1, 0, 1, 1, 0,    8,      \
2788                 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                  \
2789 T(ts_tso_vlan_ol3ol4csum_l3l4csum,      0, 1, 1, 0, 1, 1, 1,    8,      \
2790                 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2791 T(ts_tso_noff,                          0, 1, 1, 1, 0, 0, 0,    8,      \
2792                 TSP_F | TSO_F | NOFF_F)                                 \
2793 T(ts_tso_noff_l3l4csum,                 0, 1, 1, 1, 0, 0, 1,    8,      \
2794                 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                    \
2795 T(ts_tso_noff_ol3ol4csum,               0, 1, 1, 1, 0, 1, 0,    8,      \
2796                 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                  \
2797 T(ts_tso_noff_ol3ol4csum_l3l4csum,      0, 1, 1, 1, 0, 1, 1,    8,      \
2798                 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2799 T(ts_tso_noff_vlan,                     0, 1, 1, 1, 1, 0, 0,    8,      \
2800                 TSP_F | TSO_F | NOFF_F | VLAN_F)                        \
2801 T(ts_tso_noff_vlan_l3l4csum,            0, 1, 1, 1, 1, 0, 1,    8,      \
2802                 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)           \
2803 T(ts_tso_noff_vlan_ol3ol4csum,          0, 1, 1, 1, 1, 1, 0,    8,      \
2804                 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)         \
2805 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 1,    8,      \
2806                 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2807 T(sec,                                  1, 0, 0, 0, 0, 0, 0,    4,      \
2808                 T_SEC_F)                                                \
2809 T(sec_l3l4csum,                         1, 0, 0, 0, 0, 0, 1,    4,      \
2810                 T_SEC_F | L3L4CSUM_F)                                   \
2811 T(sec_ol3ol4csum,                       1, 0, 0, 0, 0, 1, 0,    4,      \
2812                 T_SEC_F | OL3OL4CSUM_F)                                 \
2813 T(sec_ol3ol4csum_l3l4csum,              1, 0, 0, 0, 0, 1, 1,    4,      \
2814                 T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)                    \
2815 T(sec_vlan,                             1, 0, 0, 0, 1, 0, 0,    6,      \
2816                 T_SEC_F | VLAN_F)                                       \
2817 T(sec_vlan_l3l4csum,                    1, 0, 0, 0, 1, 0, 1,    6,      \
2818                 T_SEC_F | VLAN_F | L3L4CSUM_F)                          \
2819 T(sec_vlan_ol3ol4csum,                  1, 0, 0, 0, 1, 1, 0,    6,      \
2820                 T_SEC_F | VLAN_F | OL3OL4CSUM_F)                        \
2821 T(sec_vlan_ol3ol4csum_l3l4csum,         1, 0, 0, 0, 1, 1, 1,    6,      \
2822                 T_SEC_F | VLAN_F | OL3OL4CSUM_F |       L3L4CSUM_F)     \
2823 T(sec_noff,                             1, 0, 0, 1, 0, 0, 0,    4,      \
2824                 T_SEC_F | NOFF_F)                                       \
2825 T(sec_noff_l3l4csum,                    1, 0, 0, 1, 0, 0, 1,    4,      \
2826                 T_SEC_F | NOFF_F | L3L4CSUM_F)                          \
2827 T(sec_noff_ol3ol4csum,                  1, 0, 0, 1, 0, 1, 0,    4,      \
2828                 T_SEC_F | NOFF_F | OL3OL4CSUM_F)                        \
2829 T(sec_noff_ol3ol4csum_l3l4csum,         1, 0, 0, 1, 0, 1, 1,    4,      \
2830                 T_SEC_F | NOFF_F | OL3OL4CSUM_F |       L3L4CSUM_F)     \
2831 T(sec_noff_vlan,                        1, 0, 0, 1, 1, 0, 0,    6,      \
2832                 T_SEC_F | NOFF_F | VLAN_F)                              \
2833 T(sec_noff_vlan_l3l4csum,               1, 0, 0, 1, 1, 0, 1,    6,      \
2834                 T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2835 T(sec_noff_vlan_ol3ol4csum,             1, 0, 0, 1, 1, 1, 0,    6,      \
2836                 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)               \
2837 T(sec_noff_vlan_ol3ol4csum_l3l4csum,    1, 0, 0, 1, 1, 1, 1,    6,      \
2838                 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)  \
2839 T(sec_tso,                              1, 0, 1, 0, 0, 0, 0,    6,      \
2840                 T_SEC_F | TSO_F)                                        \
2841 T(sec_tso_l3l4csum,                     1, 0, 1, 0, 0, 0, 1,    6,      \
2842                 T_SEC_F | TSO_F | L3L4CSUM_F)                           \
2843 T(sec_tso_ol3ol4csum,                   1, 0, 1, 0, 0, 1, 0,    6,      \
2844                 T_SEC_F | TSO_F | OL3OL4CSUM_F)                         \
2845 T(sec_tso_ol3ol4csum_l3l4csum,          1, 0, 1, 0, 0, 1, 1,    6,      \
2846                 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)            \
2847 T(sec_tso_vlan,                         1, 0, 1, 0, 1, 0, 0,    6,      \
2848                 T_SEC_F | TSO_F | VLAN_F)                               \
2849 T(sec_tso_vlan_l3l4csum,                1, 0, 1, 0, 1, 0, 1,    6,      \
2850                 T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)                  \
2851 T(sec_tso_vlan_ol3ol4csum,              1, 0, 1, 0, 1, 1, 0,    6,      \
2852                 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                \
2853 T(sec_tso_vlan_ol3ol4csum_l3l4csum,     1, 0, 1, 0, 1, 1, 1,    6,      \
2854                 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)   \
2855 T(sec_tso_noff,                         1, 0, 1, 1, 0, 0, 0,    6,      \
2856                 T_SEC_F | TSO_F | NOFF_F)                               \
2857 T(sec_tso_noff_l3l4csum,                1, 0, 1, 1, 0, 0, 1,    6,      \
2858                 T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)                  \
2859 T(sec_tso_noff_ol3ol4csum,              1, 0, 1, 1, 0, 1, 0,    6,      \
2860                 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                \
2861 T(sec_tso_noff_ol3ol4csum_l3l4csum,     1, 0, 1, 1, 0, 1, 1,    6,      \
2862                 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)   \
2863 T(sec_tso_noff_vlan,                    1, 0, 1, 1, 1, 0, 0,    6,      \
2864                 T_SEC_F | TSO_F | NOFF_F | VLAN_F)                      \
2865 T(sec_tso_noff_vlan_l3l4csum,           1, 0, 1, 1, 1, 0, 1,    6,      \
2866                 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)         \
2867 T(sec_tso_noff_vlan_ol3ol4csum,         1, 0, 1, 1, 1, 1, 0,    6,      \
2868                 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)       \
2869 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 1,   6,      \
2870                 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2871 T(sec_ts,                               1, 1, 0, 0, 0, 0, 0,    8,      \
2872                 T_SEC_F | TSP_F)                                        \
2873 T(sec_ts_l3l4csum,                      1, 1, 0, 0, 0, 0, 1,    8,      \
2874                 T_SEC_F | TSP_F | L3L4CSUM_F)                           \
2875 T(sec_ts_ol3ol4csum,                    1, 1, 0, 0, 0, 1, 0,    8,      \
2876                 T_SEC_F | TSP_F | OL3OL4CSUM_F)                         \
2877 T(sec_ts_ol3ol4csum_l3l4csum,           1, 1, 0, 0, 0, 1, 1,    8,      \
2878                 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)            \
2879 T(sec_ts_vlan,                          1, 1, 0, 0, 1, 0, 0,    8,      \
2880                 T_SEC_F | TSP_F | VLAN_F)                               \
2881 T(sec_ts_vlan_l3l4csum,                 1, 1, 0, 0, 1, 0, 1,    8,      \
2882                 T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)                  \
2883 T(sec_ts_vlan_ol3ol4csum,               1, 1, 0, 0, 1, 1, 0,    8,      \
2884                 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2885 T(sec_ts_vlan_ol3ol4csum_l3l4csum,      1, 1, 0, 0, 1, 1, 1,    8,      \
2886                 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)   \
2887 T(sec_ts_noff,                          1, 1, 0, 1, 0, 0, 0,    8,      \
2888                 T_SEC_F | TSP_F | NOFF_F)                               \
2889 T(sec_ts_noff_l3l4csum,                 1, 1, 0, 1, 0, 0, 1,    8,      \
2890                 T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)                  \
2891 T(sec_ts_noff_ol3ol4csum,               1, 1, 0, 1, 0, 1, 0,    8,      \
2892                 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2893 T(sec_ts_noff_ol3ol4csum_l3l4csum,      1, 1, 0, 1, 0, 1, 1,    8,      \
2894                 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)   \
2895 T(sec_ts_noff_vlan,                     1, 1, 0, 1, 1, 0, 0,    8,      \
2896                 T_SEC_F | TSP_F | NOFF_F | VLAN_F)                      \
2897 T(sec_ts_noff_vlan_l3l4csum,            1, 1, 0, 1, 1, 0, 1,    8,      \
2898                 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)         \
2899 T(sec_ts_noff_vlan_ol3ol4csum,          1, 1, 0, 1, 1, 1, 0,    8,      \
2900                 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)       \
2901 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 1,    8,      \
2902                 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2903 T(sec_ts_tso,                           1, 1, 1, 0, 0, 0, 0,    8,      \
2904                 T_SEC_F | TSP_F | TSO_F)                                \
2905 T(sec_ts_tso_l3l4csum,                  1, 1, 1, 0, 0, 0, 1,    8,      \
2906                 T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)                   \
2907 T(sec_ts_tso_ol3ol4csum,                1, 1, 1, 0, 0, 1, 0,    8,      \
2908                 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)                 \
2909 T(sec_ts_tso_ol3ol4csum_l3l4csum,       1, 1, 1, 0, 0, 1, 1,    8,      \
2910                 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
2911 T(sec_ts_tso_vlan,                      1, 1, 1, 0, 1, 0, 0,    8,      \
2912                 T_SEC_F | TSP_F | TSO_F | VLAN_F)                       \
2913 T(sec_ts_tso_vlan_l3l4csum,             1, 1, 1, 0, 1, 0, 1,    8,      \
2914                 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)          \
2915 T(sec_ts_tso_vlan_ol3ol4csum,           1, 1, 1, 0, 1, 1, 0,    8,      \
2916                 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)        \
2917 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum,  1, 1, 1, 0, 1, 1, 1,    8,      \
2918                 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2919 T(sec_ts_tso_noff,                      1, 1, 1, 1, 0, 0, 0,    8,      \
2920                 T_SEC_F | TSP_F | TSO_F | NOFF_F)                       \
2921 T(sec_ts_tso_noff_l3l4csum,             1, 1, 1, 1, 0, 0, 1,    8,      \
2922                 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)          \
2923 T(sec_ts_tso_noff_ol3ol4csum,           1, 1, 1, 1, 0, 1, 0,    8,      \
2924                 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)        \
2925 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum,  1, 1, 1, 1, 0, 1, 1,    8,      \
2926                 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)\
2927 T(sec_ts_tso_noff_vlan,                 1, 1, 1, 1, 1, 0, 0,    8,      \
2928                 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)              \
2929 T(sec_ts_tso_noff_vlan_l3l4csum,        1, 1, 1, 1, 1, 0, 1,    8,      \
2930                 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2931 T(sec_ts_tso_noff_vlan_ol3ol4csum,      1, 1, 1, 1, 1, 1, 0,    8,      \
2932                 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)\
2933 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 1, 8,     \
2934                 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2935                 L3L4CSUM_F)
2936
2937 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)                         \
2938         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
2939                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2940                                                                                \
2941         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
2942                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2943                                                                                \
2944         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
2945                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2946                                                                                \
2947         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
2948                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2949
2950 NIX_TX_FASTPATH_MODES
2951 #undef T
2952
2953 #endif /* __CN10K_TX_H__ */