net/ice/base: support priority configuration of exact node
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #include <rte_eventdev.h>
10
11 #define NIX_TX_OFFLOAD_NONE           (0)
12 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
13 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
15 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
16 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
17 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
18 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
19 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
20
21 /* Flags to control xmit_prepare function.
22  * Defining it from backwards to denote its been
23  * not used as offload flags to pick function
24  */
25 #define NIX_TX_VWQE_F      BIT(14)
26 #define NIX_TX_MULTI_SEG_F BIT(15)
27
28 #define NIX_TX_NEED_SEND_HDR_W1                                                \
29         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
30          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31
32 #define NIX_TX_NEED_EXT_HDR                                                    \
33         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
34          NIX_TX_OFFLOAD_TSO_F)
35
36 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
37         do {                                                                   \
38                 /* Cached value is low, Update the fc_cache_pkts */            \
39                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
40                         /* Multiply with sqe_per_sqb to express in pkts */     \
41                         (txq)->fc_cache_pkts =                                 \
42                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
43                                 << (txq)->sqes_per_sqb_log2;                   \
44                         /* Check it again for the room */                      \
45                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
46                                 return 0;                                      \
47                 }                                                              \
48         } while (0)
49
50 /* Encoded number of segments to number of dwords macro, each value of nb_segs
51  * is encoded as 4bits.
52  */
53 #define NIX_SEGDW_MAGIC 0x76654432210ULL
54
55 #define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56
57 /* Function to determine no of tx subdesc required in case ext
58  * sub desc is enabled.
59  */
60 static __rte_always_inline int
61 cn10k_nix_tx_ext_subs(const uint16_t flags)
62 {
63         return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
64                              2 :
65                              ((flags &
66                          (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
67                                       1 :
68                                       0);
69 }
70
71 static __rte_always_inline uint8_t
72 cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73 {
74         if (!(flags & NIX_TX_MULTI_SEG_F))
75                 return cn10k_nix_tx_ext_subs(flags) + 2;
76
77         /* Already everything is accounted for in segdw */
78         return segdw;
79 }
80
81 static __rte_always_inline uint8_t
82 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83 {
84         return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85                << ROC_LMT_LINES_PER_CORE_LOG2;
86 }
87
88 static __rte_always_inline uint8_t
89 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90 {
91         return (flags & NIX_TX_NEED_EXT_HDR) ?
92                              ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
93                              8;
94 }
95
96 static __rte_always_inline uint64_t
97 cn10k_nix_tx_steor_data(const uint16_t flags)
98 {
99         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
100         uint64_t data;
101
102         /* This will be moved to addr area */
103         data = dw_m1;
104         /* 15 vector sizes for single seg */
105         data |= dw_m1 << 19;
106         data |= dw_m1 << 22;
107         data |= dw_m1 << 25;
108         data |= dw_m1 << 28;
109         data |= dw_m1 << 31;
110         data |= dw_m1 << 34;
111         data |= dw_m1 << 37;
112         data |= dw_m1 << 40;
113         data |= dw_m1 << 43;
114         data |= dw_m1 << 46;
115         data |= dw_m1 << 49;
116         data |= dw_m1 << 52;
117         data |= dw_m1 << 55;
118         data |= dw_m1 << 58;
119         data |= dw_m1 << 61;
120
121         return data;
122 }
123
124 static __rte_always_inline uint8_t
125 cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126 {
127         return ((flags & NIX_TX_NEED_EXT_HDR) ?
128                               (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
129                               4);
130 }
131
132 static __rte_always_inline uint64_t
133 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134 {
135         const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
136         uint64_t data;
137
138         /* This will be moved to addr area */
139         data = dw_m1;
140         /* 15 vector sizes for single seg */
141         data |= dw_m1 << 19;
142         data |= dw_m1 << 22;
143         data |= dw_m1 << 25;
144         data |= dw_m1 << 28;
145         data |= dw_m1 << 31;
146         data |= dw_m1 << 34;
147         data |= dw_m1 << 37;
148         data |= dw_m1 << 40;
149         data |= dw_m1 << 43;
150         data |= dw_m1 << 46;
151         data |= dw_m1 << 49;
152         data |= dw_m1 << 52;
153         data |= dw_m1 << 55;
154         data |= dw_m1 << 58;
155         data |= dw_m1 << 61;
156
157         return data;
158 }
159
160 static __rte_always_inline uint64_t
161 cn10k_cpt_tx_steor_data(void)
162 {
163         /* We have two CPT instructions per LMTLine */
164         const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
165         uint64_t data;
166
167         /* This will be moved to addr area */
168         data = dw_m1 << 16;
169         data |= dw_m1 << 19;
170         data |= dw_m1 << 22;
171         data |= dw_m1 << 25;
172         data |= dw_m1 << 28;
173         data |= dw_m1 << 31;
174         data |= dw_m1 << 34;
175         data |= dw_m1 << 37;
176         data |= dw_m1 << 40;
177         data |= dw_m1 << 43;
178         data |= dw_m1 << 46;
179         data |= dw_m1 << 49;
180         data |= dw_m1 << 52;
181         data |= dw_m1 << 55;
182         data |= dw_m1 << 58;
183         data |= dw_m1 << 61;
184
185         return data;
186 }
187
188 static __rte_always_inline void
189 cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
190                       const uint16_t flags, const uint16_t static_sz)
191 {
192         if (static_sz)
193                 cmd[0] = txq->send_hdr_w0;
194         else
195                 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
196                          ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
197         cmd[1] = 0;
198
199         if (flags & NIX_TX_NEED_EXT_HDR) {
200                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
201                         cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
202                 else
203                         cmd[2] = NIX_SUBDC_EXT << 60;
204                 cmd[3] = 0;
205                 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
206         } else {
207                 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
208         }
209 }
210
211 static __rte_always_inline void
212 cn10k_nix_sec_fc_wait(struct cn10k_eth_txq *txq, uint16_t nb_pkts)
213 {
214         int32_t nb_desc, val, newval;
215         int32_t *fc_sw;
216         volatile uint64_t *fc;
217
218         /* Check if there is any CPT instruction to submit */
219         if (!nb_pkts)
220                 return;
221
222 again:
223         fc_sw = txq->cpt_fc_sw;
224         val = __atomic_sub_fetch(fc_sw, nb_pkts, __ATOMIC_RELAXED);
225         if (likely(val >= 0))
226                 return;
227
228         nb_desc = txq->cpt_desc;
229         fc = txq->cpt_fc;
230         while (true) {
231                 newval = nb_desc - __atomic_load_n(fc, __ATOMIC_RELAXED);
232                 newval -= nb_pkts;
233                 if (newval >= 0)
234                         break;
235         }
236
237         if (!__atomic_compare_exchange_n(fc_sw, &val, newval, false,
238                                          __ATOMIC_RELAXED, __ATOMIC_RELAXED))
239                 goto again;
240 }
241
242 static __rte_always_inline void
243 cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
244                      uint8_t loff, uint8_t shft)
245 {
246         uint64_t data;
247         uintptr_t pa;
248
249         /* Check if there is any CPT instruction to submit */
250         if (!lnum && !loff)
251                 return;
252
253         data = cn10k_cpt_tx_steor_data();
254         /* Update lmtline use for partial end line */
255         if (loff) {
256                 data &= ~(0x7ULL << shft);
257                 /* Update it to half full i.e 64B */
258                 data |= (0x3UL << shft);
259         }
260
261         pa = io_addr | ((data >> 16) & 0x7) << 4;
262         data &= ~(0x7ULL << 16);
263         /* Update lines - 1 that contain valid data */
264         data |= ((uint64_t)(lnum + loff - 1)) << 12;
265         data |= lmt_id;
266
267         /* STEOR */
268         roc_lmt_submit_steorl(data, pa);
269 }
270
271 #if defined(RTE_ARCH_ARM64)
272 static __rte_always_inline void
273 cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
274                        uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
275                        uint8_t *loff, uint8_t *shft, uint64_t sa_base,
276                        const uint16_t flags)
277 {
278         struct cn10k_sec_sess_priv sess_priv;
279         uint32_t pkt_len, dlen_adj, rlen;
280         uint8_t l3l4type, chksum;
281         uint64x2_t cmd01, cmd23;
282         uint8_t l2_len, l3_len;
283         uintptr_t dptr, nixtx;
284         uint64_t ucode_cmd[4];
285         uint64_t *laddr;
286         uint16_t tag;
287         uint64_t sa;
288
289         sess_priv.u64 = *rte_security_dynfield(m);
290
291         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
292                 /* Extract l3l4type either from il3il4type or ol3ol4type */
293                 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
294                     flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
295                         l2_len = vgetq_lane_u8(*cmd0, 10);
296                         /* L4 ptr from send hdr includes l2 and l3 len */
297                         l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len;
298                         l3l4type = vgetq_lane_u8(*cmd0, 13);
299                 } else {
300                         l2_len = vgetq_lane_u8(*cmd0, 8);
301                         /* L4 ptr from send hdr includes l2 and l3 len */
302                         l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len;
303                         l3l4type = vgetq_lane_u8(*cmd0, 12);
304                 }
305
306                 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
307                 chksum = ~chksum;
308                 sess_priv.chksum = sess_priv.chksum & chksum;
309                 /* Clear SEND header flags */
310                 *cmd0 = vsetq_lane_u16(0, *cmd0, 6);
311         } else {
312                 l2_len = m->l2_len;
313                 l3_len = m->l3_len;
314         }
315
316         /* Retrieve DPTR */
317         dptr = vgetq_lane_u64(*cmd1, 1);
318         pkt_len = vgetq_lane_u16(*cmd0, 0);
319
320         /* Calculate dlen adj */
321         dlen_adj = pkt_len - l2_len;
322         /* Exclude l3 len from roundup for transport mode */
323         dlen_adj -= sess_priv.mode ? 0 : l3_len;
324         rlen = (dlen_adj + sess_priv.roundup_len) +
325                (sess_priv.roundup_byte - 1);
326         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
327         rlen += sess_priv.partial_len;
328         dlen_adj = rlen - dlen_adj;
329
330         /* Update send descriptors. Security is single segment only */
331         *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
332         *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
333
334         /* Get area where NIX descriptor needs to be stored */
335         nixtx = dptr + pkt_len + dlen_adj;
336         nixtx += BIT_ULL(7);
337         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
338
339         /* Return nixtx addr */
340         *nixtx_addr = (nixtx + 16);
341
342         /* DLEN passed is excluding L2HDR */
343         pkt_len -= l2_len;
344         tag = sa_base & 0xFFFFUL;
345         sa_base &= ~0xFFFFUL;
346         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
347         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
348         ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
349                         ((uint64_t)sess_priv.chksum) << 32 |
350                         ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
351
352         /* CPT Word 0 and Word 1 */
353         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
354         /* CPT_RES_S is 16B above NIXTX */
355         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
356
357         /* CPT word 2 and 3 */
358         cmd23 = vdupq_n_u64(0);
359         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
360                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
361         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
362
363         dptr += l2_len;
364
365         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
366                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
367                         *((uint16_t *)(dptr - 2)) =
368                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
369                 else
370                         *((uint16_t *)(dptr - 2)) =
371                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
372         }
373
374         ucode_cmd[1] = dptr;
375         ucode_cmd[2] = dptr;
376
377         /* Move to our line */
378         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
379
380         /* Write CPT instruction to lmt line */
381         vst1q_u64(laddr, cmd01);
382         vst1q_u64((laddr + 2), cmd23);
383
384         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
385         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
386
387         /* Move to next line for every other CPT inst */
388         *loff = !(*loff);
389         *lnum = *lnum + (*loff ? 0 : 1);
390         *shft = *shft + (*loff ? 0 : 3);
391 }
392
393 static __rte_always_inline void
394 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
395                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
396                    uint64_t sa_base, const uint16_t flags)
397 {
398         struct cn10k_sec_sess_priv sess_priv;
399         uint32_t pkt_len, dlen_adj, rlen;
400         struct nix_send_hdr_s *send_hdr;
401         uint8_t l3l4type, chksum;
402         uint64x2_t cmd01, cmd23;
403         union nix_send_sg_s *sg;
404         uint8_t l2_len, l3_len;
405         uintptr_t dptr, nixtx;
406         uint64_t ucode_cmd[4];
407         uint64_t *laddr;
408         uint16_t tag;
409         uint64_t sa;
410
411         /* Move to our line from base */
412         sess_priv.u64 = *rte_security_dynfield(m);
413         send_hdr = (struct nix_send_hdr_s *)cmd;
414         if (flags & NIX_TX_NEED_EXT_HDR)
415                 sg = (union nix_send_sg_s *)&cmd[4];
416         else
417                 sg = (union nix_send_sg_s *)&cmd[2];
418
419         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
420                 /* Extract l3l4type either from il3il4type or ol3ol4type */
421                 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
422                     flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
423                         l2_len = (cmd[1] >> 16) & 0xFF;
424                         /* L4 ptr from send hdr includes l2 and l3 len */
425                         l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len;
426                         l3l4type = (cmd[1] >> 40) & 0xFF;
427                 } else {
428                         l2_len = cmd[1] & 0xFF;
429                         /* L4 ptr from send hdr includes l2 and l3 len */
430                         l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len;
431                         l3l4type = (cmd[1] >> 32) & 0xFF;
432                 }
433
434                 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
435                 chksum = ~chksum;
436                 sess_priv.chksum = sess_priv.chksum & chksum;
437                 /* Clear SEND header flags */
438                 cmd[1] &= ~(0xFFFFUL << 32);
439         } else {
440                 l2_len = m->l2_len;
441                 l3_len = m->l3_len;
442         }
443
444         /* Retrieve DPTR */
445         dptr = *(uint64_t *)(sg + 1);
446         pkt_len = send_hdr->w0.total;
447
448         /* Calculate dlen adj */
449         dlen_adj = pkt_len - l2_len;
450         /* Exclude l3 len from roundup for transport mode */
451         dlen_adj -= sess_priv.mode ? 0 : l3_len;
452         rlen = (dlen_adj + sess_priv.roundup_len) +
453                (sess_priv.roundup_byte - 1);
454         rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
455         rlen += sess_priv.partial_len;
456         dlen_adj = rlen - dlen_adj;
457
458         /* Update send descriptors. Security is single segment only */
459         send_hdr->w0.total = pkt_len + dlen_adj;
460         sg->seg1_size = pkt_len + dlen_adj;
461
462         /* Get area where NIX descriptor needs to be stored */
463         nixtx = dptr + pkt_len + dlen_adj;
464         nixtx += BIT_ULL(7);
465         nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
466
467         /* Return nixtx addr */
468         *nixtx_addr = (nixtx + 16);
469
470         /* DLEN passed is excluding L2HDR */
471         pkt_len -= l2_len;
472         tag = sa_base & 0xFFFFUL;
473         sa_base &= ~0xFFFFUL;
474         sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
475         ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
476         ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
477                         ((uint64_t)sess_priv.chksum) << 32 |
478                         ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
479
480         /* CPT Word 0 and Word 1. Assume no multi-seg support */
481         cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
482         /* CPT_RES_S is 16B above NIXTX */
483         cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
484
485         /* CPT word 2 and 3 */
486         cmd23 = vdupq_n_u64(0);
487         cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
488                                 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
489         cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
490
491         dptr += l2_len;
492
493         if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
494                 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
495                         *((uint16_t *)(dptr - 2)) =
496                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
497                 else
498                         *((uint16_t *)(dptr - 2)) =
499                                 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
500         }
501         ucode_cmd[1] = dptr;
502         ucode_cmd[2] = dptr;
503
504         /* Move to our line */
505         laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
506
507         /* Write CPT instruction to lmt line */
508         vst1q_u64(laddr, cmd01);
509         vst1q_u64((laddr + 2), cmd23);
510
511         *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
512         *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
513
514         /* Move to next line for every other CPT inst */
515         *loff = !(*loff);
516         *lnum = *lnum + (*loff ? 0 : 1);
517         *shft = *shft + (*loff ? 0 : 3);
518 }
519
520 #else
521
522 static __rte_always_inline void
523 cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
524                    uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
525                    uint64_t sa_base, const uint16_t flags)
526 {
527         RTE_SET_USED(m);
528         RTE_SET_USED(cmd);
529         RTE_SET_USED(nixtx_addr);
530         RTE_SET_USED(lbase);
531         RTE_SET_USED(lnum);
532         RTE_SET_USED(loff);
533         RTE_SET_USED(shft);
534         RTE_SET_USED(sa_base);
535         RTE_SET_USED(flags);
536 }
537 #endif
538
539 static __rte_always_inline void
540 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
541 {
542         uint64_t mask, ol_flags = m->ol_flags;
543
544         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
545                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
546                 uint16_t *iplen, *oiplen, *oudplen;
547                 uint16_t lso_sb, paylen;
548
549                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
550                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
551                          m->l2_len + m->l3_len + m->l4_len;
552
553                 /* Reduce payload len from base headers */
554                 paylen = m->pkt_len - lso_sb;
555
556                 /* Get iplen position assuming no tunnel hdr */
557                 iplen = (uint16_t *)(mdata + m->l2_len +
558                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
559                 /* Handle tunnel tso */
560                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
561                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
562                         const uint8_t is_udp_tun =
563                                 (CNXK_NIX_UDP_TUN_BITMASK >>
564                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
565                                 0x1;
566
567                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
568                                               (2 << !!(ol_flags &
569                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
570                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
571                                                    paylen);
572
573                         /* Update format for UDP tunneled packet */
574                         if (is_udp_tun) {
575                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
576                                                        m->outer_l3_len + 4);
577                                 *oudplen = rte_cpu_to_be_16(
578                                         rte_be_to_cpu_16(*oudplen) - paylen);
579                         }
580
581                         /* Update iplen position to inner ip hdr */
582                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
583                                              m->l4_len +
584                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
585                 }
586
587                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
588         }
589 }
590
591 static __rte_always_inline void
592 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
593                        const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
594                        uint64_t mark_fmt)
595 {
596         uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
597         struct nix_send_ext_s *send_hdr_ext;
598         struct nix_send_hdr_s *send_hdr;
599         uint64_t ol_flags = 0, mask;
600         union nix_send_hdr_w1_u w1;
601         union nix_send_sg_s *sg;
602         uint16_t mark_form = 0;
603
604         send_hdr = (struct nix_send_hdr_s *)cmd;
605         if (flags & NIX_TX_NEED_EXT_HDR) {
606                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
607                 sg = (union nix_send_sg_s *)(cmd + 4);
608                 /* Clear previous markings */
609                 send_hdr_ext->w0.lso = 0;
610                 send_hdr_ext->w0.mark_en = 0;
611                 send_hdr_ext->w1.u = 0;
612                 ol_flags = m->ol_flags;
613         } else {
614                 sg = (union nix_send_sg_s *)(cmd + 2);
615         }
616
617         if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
618                 ol_flags = m->ol_flags;
619                 w1.u = 0;
620         }
621
622         if (!(flags & NIX_TX_MULTI_SEG_F))
623                 send_hdr->w0.total = m->data_len;
624         else
625                 send_hdr->w0.total = m->pkt_len;
626         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
627
628         /*
629          * L3type:  2 => IPV4
630          *          3 => IPV4 with csum
631          *          4 => IPV6
632          * L3type and L3ptr needs to be set for either
633          * L3 csum or L4 csum or LSO
634          *
635          */
636
637         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
638             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
639                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
640                 const uint8_t ol3type =
641                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
642                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
643                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
644
645                 /* Outer L3 */
646                 w1.ol3type = ol3type;
647                 mask = 0xffffull << ((!!ol3type) << 4);
648                 w1.ol3ptr = ~mask & m->outer_l2_len;
649                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
650
651                 /* Outer L4 */
652                 w1.ol4type = csum + (csum << 1);
653
654                 /* Inner L3 */
655                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
656                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
657                 w1.il3ptr = w1.ol4ptr + m->l2_len;
658                 w1.il4ptr = w1.il3ptr + m->l3_len;
659                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
660                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
661
662                 /* Inner L4 */
663                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
664
665                 /* In case of no tunnel header use only
666                  * shift IL3/IL4 fields a bit to use
667                  * OL3/OL4 for header checksum
668                  */
669                 mask = !ol3type;
670                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
671                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
672
673         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
674                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
675                 const uint8_t outer_l2_len = m->outer_l2_len;
676
677                 /* Outer L3 */
678                 w1.ol3ptr = outer_l2_len;
679                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
680                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
681                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
682                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
683                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
684
685                 /* Outer L4 */
686                 w1.ol4type = csum + (csum << 1);
687
688         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
689                 const uint8_t l2_len = m->l2_len;
690
691                 /* Always use OLXPTR and OLXTYPE when only
692                  * when one header is present
693                  */
694
695                 /* Inner L3 */
696                 w1.ol3ptr = l2_len;
697                 w1.ol4ptr = l2_len + m->l3_len;
698                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
699                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
700                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
701                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
702
703                 /* Inner L4 */
704                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
705         }
706
707         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
708                 const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
709                 const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
710                                                   RTE_MBUF_F_TX_IPV6));
711
712                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
713                 /* HW will update ptr after vlan0 update */
714                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
715                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
716
717                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
718                 /* 2B before end of l2 header */
719                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
720                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
721                 /* Fill for VLAN marking only when VLAN insertion enabled */
722                 mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
723                              (send_hdr_ext->w1.vlan1_ins_ena ||
724                               send_hdr_ext->w1.vlan0_ins_ena));
725
726                 /* Mask requested flags with packet data information */
727                 mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
728                 mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
729
730                 mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
731                 mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
732                 markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
733
734                 send_hdr_ext->w0.mark_en = !!mark_off;
735                 send_hdr_ext->w0.markform = mark_form & 0x7F;
736                 send_hdr_ext->w0.markptr = markptr;
737         }
738
739         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
740                 uint16_t lso_sb;
741                 uint64_t mask;
742
743                 mask = -(!w1.il3type);
744                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
745
746                 send_hdr_ext->w0.lso_sb = lso_sb;
747                 send_hdr_ext->w0.lso = 1;
748                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
749                 send_hdr_ext->w0.lso_format =
750                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
751                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
752
753                 /* Handle tunnel tso */
754                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
755                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
756                         const uint8_t is_udp_tun =
757                                 (CNXK_NIX_UDP_TUN_BITMASK >>
758                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
759                                 0x1;
760                         uint8_t shift = is_udp_tun ? 32 : 0;
761
762                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
763                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
764
765                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
766                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
767                         /* Update format for UDP tunneled packet */
768                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
769                 }
770         }
771
772         if (flags & NIX_TX_NEED_SEND_HDR_W1)
773                 send_hdr->w1.u = w1.u;
774
775         if (!(flags & NIX_TX_MULTI_SEG_F)) {
776                 sg->seg1_size = send_hdr->w0.total;
777                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
778
779                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
780                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
781                          *              is greater than 1
782                          * DF bit = 0 otherwise
783                          */
784                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
785                 }
786                 /* Mark mempool object as "put" since it is freed by NIX */
787                 if (!send_hdr->w0.df)
788                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
789         } else {
790                 sg->seg1_size = m->data_len;
791                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
792
793                 /* NOFF is handled later for multi-seg */
794         }
795
796         if (flags & NIX_TX_OFFLOAD_SECURITY_F)
797                 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
798 }
799
800 static __rte_always_inline void
801 cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
802                            const uint16_t flags)
803 {
804         struct nix_send_ext_s *send_hdr_ext;
805         union nix_send_sg_s *sg;
806
807         /* With minimal offloads, 'cmd' being local could be optimized out to
808          * registers. In other cases, 'cmd' will be in stack. Intent is
809          * 'cmd' stores content from txq->cmd which is copied only once.
810          */
811         *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
812         lmt_addr += 16;
813         if (flags & NIX_TX_NEED_EXT_HDR) {
814                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
815                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
816                 lmt_addr += 16;
817
818                 sg = (union nix_send_sg_s *)(cmd + 4);
819         } else {
820                 sg = (union nix_send_sg_s *)(cmd + 2);
821         }
822         /* In case of multi-seg, sg template is stored here */
823         *((union nix_send_sg_s *)lmt_addr) = *sg;
824         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
825 }
826
827 static __rte_always_inline void
828 cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
829                               const uint64_t ol_flags, const uint16_t no_segdw,
830                               const uint16_t flags)
831 {
832         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
833                 const uint8_t is_ol_tstamp =
834                         !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
835                 uint64_t *lmt = (uint64_t *)lmt_addr;
836                 uint16_t off = (no_segdw - 1) << 1;
837                 struct nix_send_mem_s *send_mem;
838
839                 send_mem = (struct nix_send_mem_s *)(lmt + off);
840                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
841                  * should not be recorded, hence changing the alg type to
842                  * NIX_SENDMEMALG_SUB and also changing send mem addr field to
843                  * next 8 bytes as it corrupts the actual Tx tstamp registered
844                  * address.
845                  */
846                 send_mem->w0.subdc = NIX_SUBDC_MEM;
847                 send_mem->w0.alg =
848                         NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
849                 send_mem->addr =
850                         (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
851         }
852 }
853
854 static __rte_always_inline uint16_t
855 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
856 {
857         struct nix_send_hdr_s *send_hdr;
858         union nix_send_sg_s *sg;
859         struct rte_mbuf *m_next;
860         uint64_t *slist, sg_u;
861         uint64_t nb_segs;
862         uint64_t segdw;
863         uint8_t off, i;
864
865         send_hdr = (struct nix_send_hdr_s *)cmd;
866
867         if (flags & NIX_TX_NEED_EXT_HDR)
868                 off = 2;
869         else
870                 off = 0;
871
872         sg = (union nix_send_sg_s *)&cmd[2 + off];
873
874         /* Start from second segment, first segment is already there */
875         i = 1;
876         sg_u = sg->u;
877         nb_segs = m->nb_segs - 1;
878         m_next = m->next;
879         slist = &cmd[3 + off + 1];
880
881         /* Set invert df if buffer is not to be freed by H/W */
882         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
883                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
884
885                 /* Mark mempool object as "put" since it is freed by NIX */
886 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
887         if (!(sg_u & (1ULL << 55)))
888                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
889         rte_io_wmb();
890 #endif
891         m = m_next;
892         if (!m)
893                 goto done;
894
895         /* Fill mbuf segments */
896         do {
897                 m_next = m->next;
898                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
899                 *slist = rte_mbuf_data_iova(m);
900                 /* Set invert df if buffer is not to be freed by H/W */
901                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
902                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
903                         /* Mark mempool object as "put" since it is freed by NIX
904                          */
905 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
906                 if (!(sg_u & (1ULL << (i + 55))))
907                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
908 #endif
909                 slist++;
910                 i++;
911                 nb_segs--;
912                 if (i > 2 && nb_segs) {
913                         i = 0;
914                         /* Next SG subdesc */
915                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
916                         sg->u = sg_u;
917                         sg->segs = 3;
918                         sg = (union nix_send_sg_s *)slist;
919                         sg_u = sg->u;
920                         slist++;
921                 }
922                 m = m_next;
923         } while (nb_segs);
924
925 done:
926         sg->u = sg_u;
927         sg->segs = i;
928         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
929         /* Roundup extra dwords to multiple of 2 */
930         segdw = (segdw >> 1) + (segdw & 0x1);
931         /* Default dwords */
932         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
933         send_hdr->w0.sizem1 = segdw - 1;
934
935         return segdw;
936 }
937
938 static __rte_always_inline uint16_t
939 cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
940                     uint16_t pkts, uint64_t *cmd, const uint16_t flags)
941 {
942         struct cn10k_eth_txq *txq = tx_queue;
943         const rte_iova_t io_addr = txq->io_addr;
944         uint8_t lnum, c_lnum, c_shft, c_loff;
945         uintptr_t pa, lbase = txq->lmt_base;
946         uint16_t lmt_id, burst, left, i;
947         uintptr_t c_lbase = lbase;
948         uint64_t lso_tun_fmt = 0;
949         uint64_t mark_fmt = 0;
950         uint8_t mark_flag = 0;
951         rte_iova_t c_io_addr;
952         uint16_t c_lmt_id;
953         uint64_t sa_base;
954         uintptr_t laddr;
955         uint64_t data;
956         bool sec;
957
958         if (!(flags & NIX_TX_VWQE_F)) {
959                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
960                 /* Reduce the cached count */
961                 txq->fc_cache_pkts -= pkts;
962         }
963         /* Get cmd skeleton */
964         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
965
966         if (flags & NIX_TX_OFFLOAD_TSO_F)
967                 lso_tun_fmt = txq->lso_tun_fmt;
968
969         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
970                 mark_fmt = txq->mark_fmt;
971                 mark_flag = txq->mark_flag;
972         }
973
974         /* Get LMT base address and LMT ID as lcore id */
975         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
976         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
977                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
978                 c_io_addr = txq->cpt_io_addr;
979                 sa_base = txq->sa_base;
980         }
981
982         left = pkts;
983 again:
984         burst = left > 32 ? 32 : left;
985
986         lnum = 0;
987         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
988                 c_lnum = 0;
989                 c_loff = 0;
990                 c_shft = 16;
991         }
992
993         for (i = 0; i < burst; i++) {
994                 /* Perform header writes for TSO, barrier at
995                  * lmt steorl will suffice.
996                  */
997                 if (flags & NIX_TX_OFFLOAD_TSO_F)
998                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
999
1000                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1001                                        &sec, mark_flag, mark_fmt);
1002
1003                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1004
1005                 /* Prepare CPT instruction and get nixtx addr */
1006                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1007                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1008                                            &c_lnum, &c_loff, &c_shft, sa_base,
1009                                            flags);
1010
1011                 /* Move NIX desc to LMT/NIXTX area */
1012                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1013                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1014                                               4, flags);
1015                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
1016                         lnum++;
1017         }
1018
1019         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1020                 ws[1] = roc_sso_hws_head_wait(ws[0]);
1021
1022         left -= burst;
1023         tx_pkts += burst;
1024
1025         /* Submit CPT instructions if any */
1026         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1027                 /* Reduce pkts to be sent to CPT */
1028                 burst -= ((c_lnum << 1) + c_loff);
1029                 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
1030                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1031                                      c_shft);
1032         }
1033
1034         /* Trigger LMTST */
1035         if (burst > 16) {
1036                 data = cn10k_nix_tx_steor_data(flags);
1037                 pa = io_addr | (data & 0x7) << 4;
1038                 data &= ~0x7ULL;
1039                 data |= (15ULL << 12);
1040                 data |= (uint64_t)lmt_id;
1041
1042                 /* STEOR0 */
1043                 roc_lmt_submit_steorl(data, pa);
1044
1045                 data = cn10k_nix_tx_steor_data(flags);
1046                 pa = io_addr | (data & 0x7) << 4;
1047                 data &= ~0x7ULL;
1048                 data |= ((uint64_t)(burst - 17)) << 12;
1049                 data |= (uint64_t)(lmt_id + 16);
1050
1051                 /* STEOR1 */
1052                 roc_lmt_submit_steorl(data, pa);
1053         } else if (burst) {
1054                 data = cn10k_nix_tx_steor_data(flags);
1055                 pa = io_addr | (data & 0x7) << 4;
1056                 data &= ~0x7ULL;
1057                 data |= ((uint64_t)(burst - 1)) << 12;
1058                 data |= lmt_id;
1059
1060                 /* STEOR0 */
1061                 roc_lmt_submit_steorl(data, pa);
1062         }
1063
1064         rte_io_wmb();
1065         if (left)
1066                 goto again;
1067
1068         return pkts;
1069 }
1070
1071 static __rte_always_inline uint16_t
1072 cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
1073                          struct rte_mbuf **tx_pkts, uint16_t pkts,
1074                          uint64_t *cmd, const uint16_t flags)
1075 {
1076         struct cn10k_eth_txq *txq = tx_queue;
1077         uintptr_t pa0, pa1, lbase = txq->lmt_base;
1078         const rte_iova_t io_addr = txq->io_addr;
1079         uint16_t segdw, lmt_id, burst, left, i;
1080         uint8_t lnum, c_lnum, c_loff;
1081         uintptr_t c_lbase = lbase;
1082         uint64_t lso_tun_fmt = 0;
1083         uint64_t mark_fmt = 0;
1084         uint8_t mark_flag = 0;
1085         uint64_t data0, data1;
1086         rte_iova_t c_io_addr;
1087         uint8_t shft, c_shft;
1088         __uint128_t data128;
1089         uint16_t c_lmt_id;
1090         uint64_t sa_base;
1091         uintptr_t laddr;
1092         bool sec;
1093
1094         if (!(flags & NIX_TX_VWQE_F)) {
1095                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1096                 /* Reduce the cached count */
1097                 txq->fc_cache_pkts -= pkts;
1098         }
1099         /* Get cmd skeleton */
1100         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
1101
1102         if (flags & NIX_TX_OFFLOAD_TSO_F)
1103                 lso_tun_fmt = txq->lso_tun_fmt;
1104
1105         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1106                 mark_fmt = txq->mark_fmt;
1107                 mark_flag = txq->mark_flag;
1108         }
1109
1110         /* Get LMT base address and LMT ID as lcore id */
1111         ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1112         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1113                 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1114                 c_io_addr = txq->cpt_io_addr;
1115                 sa_base = txq->sa_base;
1116         }
1117
1118         left = pkts;
1119 again:
1120         burst = left > 32 ? 32 : left;
1121         shft = 16;
1122         data128 = 0;
1123
1124         lnum = 0;
1125         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1126                 c_lnum = 0;
1127                 c_loff = 0;
1128                 c_shft = 16;
1129         }
1130
1131         for (i = 0; i < burst; i++) {
1132                 /* Perform header writes for TSO, barrier at
1133                  * lmt steorl will suffice.
1134                  */
1135                 if (flags & NIX_TX_OFFLOAD_TSO_F)
1136                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1137
1138                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1139                                        &sec, mark_flag, mark_fmt);
1140
1141                 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1142
1143                 /* Prepare CPT instruction and get nixtx addr */
1144                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1145                         cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1146                                            &c_lnum, &c_loff, &c_shft, sa_base,
1147                                            flags);
1148
1149                 /* Move NIX desc to LMT/NIXTX area */
1150                 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1151                 /* Store sg list directly on lmt line */
1152                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1153                                                flags);
1154                 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1155                                               segdw, flags);
1156                 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1157                         lnum++;
1158                         data128 |= (((__uint128_t)(segdw - 1)) << shft);
1159                         shft += 3;
1160                 }
1161         }
1162
1163         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1164                 ws[1] = roc_sso_hws_head_wait(ws[0]);
1165
1166         left -= burst;
1167         tx_pkts += burst;
1168
1169         /* Submit CPT instructions if any */
1170         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1171                 /* Reduce pkts to be sent to CPT */
1172                 burst -= ((c_lnum << 1) + c_loff);
1173                 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
1174                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1175                                      c_shft);
1176         }
1177
1178         data0 = (uint64_t)data128;
1179         data1 = (uint64_t)(data128 >> 64);
1180         /* Make data0 similar to data1 */
1181         data0 >>= 16;
1182         /* Trigger LMTST */
1183         if (burst > 16) {
1184                 pa0 = io_addr | (data0 & 0x7) << 4;
1185                 data0 &= ~0x7ULL;
1186                 /* Move lmtst1..15 sz to bits 63:19 */
1187                 data0 <<= 16;
1188                 data0 |= (15ULL << 12);
1189                 data0 |= (uint64_t)lmt_id;
1190
1191                 /* STEOR0 */
1192                 roc_lmt_submit_steorl(data0, pa0);
1193
1194                 pa1 = io_addr | (data1 & 0x7) << 4;
1195                 data1 &= ~0x7ULL;
1196                 data1 <<= 16;
1197                 data1 |= ((uint64_t)(burst - 17)) << 12;
1198                 data1 |= (uint64_t)(lmt_id + 16);
1199
1200                 /* STEOR1 */
1201                 roc_lmt_submit_steorl(data1, pa1);
1202         } else if (burst) {
1203                 pa0 = io_addr | (data0 & 0x7) << 4;
1204                 data0 &= ~0x7ULL;
1205                 /* Move lmtst1..15 sz to bits 63:19 */
1206                 data0 <<= 16;
1207                 data0 |= ((burst - 1) << 12);
1208                 data0 |= (uint64_t)lmt_id;
1209
1210                 /* STEOR0 */
1211                 roc_lmt_submit_steorl(data0, pa0);
1212         }
1213
1214         rte_io_wmb();
1215         if (left)
1216                 goto again;
1217
1218         return pkts;
1219 }
1220
1221 #if defined(RTE_ARCH_ARM64)
1222
1223 static __rte_always_inline void
1224 cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1225                       union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1226                       const uint64_t flags, const uint64_t lso_tun_fmt)
1227 {
1228         uint16_t lso_sb;
1229         uint64_t mask;
1230
1231         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1232                 return;
1233
1234         mask = -(!w1->il3type);
1235         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1236
1237         w0->u |= BIT(14);
1238         w0->lso_sb = lso_sb;
1239         w0->lso_mps = m->tso_segsz;
1240         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1241         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1242
1243         /* Handle tunnel tso */
1244         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1245             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1246                 const uint8_t is_udp_tun =
1247                         (CNXK_NIX_UDP_TUN_BITMASK >>
1248                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1249                         0x1;
1250                 uint8_t shift = is_udp_tun ? 32 : 0;
1251
1252                 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1253                 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1254
1255                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1256                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1257                 /* Update format for UDP tunneled packet */
1258
1259                 w0->lso_format = (lso_tun_fmt >> shift);
1260         }
1261 }
1262
1263 static __rte_always_inline void
1264 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1265                                 union nix_send_hdr_w0_u *sh,
1266                                 union nix_send_sg_s *sg, const uint32_t flags)
1267 {
1268         struct rte_mbuf *m_next;
1269         uint64_t *slist, sg_u;
1270         uint16_t nb_segs;
1271         int i = 1;
1272
1273         sh->total = m->pkt_len;
1274         /* Clear sg->u header before use */
1275         sg->u &= 0xFC00000000000000;
1276         sg_u = sg->u;
1277         slist = &cmd[0];
1278
1279         sg_u = sg_u | ((uint64_t)m->data_len);
1280
1281         nb_segs = m->nb_segs - 1;
1282         m_next = m->next;
1283
1284         /* Set invert df if buffer is not to be freed by H/W */
1285         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1286                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1287                 /* Mark mempool object as "put" since it is freed by NIX */
1288 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1289         if (!(sg_u & (1ULL << 55)))
1290                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1291         rte_io_wmb();
1292 #endif
1293
1294         m = m_next;
1295         /* Fill mbuf segments */
1296         do {
1297                 m_next = m->next;
1298                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1299                 *slist = rte_mbuf_data_iova(m);
1300                 /* Set invert df if buffer is not to be freed by H/W */
1301                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1302                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1303                         /* Mark mempool object as "put" since it is freed by NIX
1304                          */
1305 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1306                 if (!(sg_u & (1ULL << (i + 55))))
1307                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1308                 rte_io_wmb();
1309 #endif
1310                 slist++;
1311                 i++;
1312                 nb_segs--;
1313                 if (i > 2 && nb_segs) {
1314                         i = 0;
1315                         /* Next SG subdesc */
1316                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1317                         sg->u = sg_u;
1318                         sg->segs = 3;
1319                         sg = (union nix_send_sg_s *)slist;
1320                         sg_u = sg->u;
1321                         slist++;
1322                 }
1323                 m = m_next;
1324         } while (nb_segs);
1325
1326         sg->u = sg_u;
1327         sg->segs = i;
1328 }
1329
1330 static __rte_always_inline void
1331 cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1332                            uint64x2_t *cmd1, const uint8_t segdw,
1333                            const uint32_t flags)
1334 {
1335         union nix_send_hdr_w0_u sh;
1336         union nix_send_sg_s sg;
1337
1338         if (m->nb_segs == 1) {
1339                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1340                         sg.u = vgetq_lane_u64(cmd1[0], 0);
1341                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1342                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1343                 }
1344
1345 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1346                 sg.u = vgetq_lane_u64(cmd1[0], 0);
1347                 if (!(sg.u & (1ULL << 55)))
1348                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1349                 rte_io_wmb();
1350 #endif
1351                 return;
1352         }
1353
1354         sh.u = vgetq_lane_u64(cmd0[0], 0);
1355         sg.u = vgetq_lane_u64(cmd1[0], 0);
1356
1357         cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1358
1359         sh.sizem1 = segdw - 1;
1360         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1361         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1362 }
1363
1364 #define NIX_DESCS_PER_LOOP 4
1365
1366 static __rte_always_inline uint8_t
1367 cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1368                                uint64x2_t *cmd1, uint64x2_t *cmd2,
1369                                uint64x2_t *cmd3, uint8_t *segdw,
1370                                uint64_t *lmt_addr, __uint128_t *data128,
1371                                uint8_t *shift, const uint16_t flags)
1372 {
1373         uint8_t j, off, lmt_used;
1374
1375         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1376             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1377                 /* No segments in 4 consecutive packets. */
1378                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1379                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1380                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1381                                                            &cmd0[j], &cmd1[j],
1382                                                            segdw[j], flags);
1383                         vst1q_u64(lmt_addr, cmd0[0]);
1384                         vst1q_u64(lmt_addr + 2, cmd1[0]);
1385                         vst1q_u64(lmt_addr + 4, cmd0[1]);
1386                         vst1q_u64(lmt_addr + 6, cmd1[1]);
1387                         vst1q_u64(lmt_addr + 8, cmd0[2]);
1388                         vst1q_u64(lmt_addr + 10, cmd1[2]);
1389                         vst1q_u64(lmt_addr + 12, cmd0[3]);
1390                         vst1q_u64(lmt_addr + 14, cmd1[3]);
1391
1392                         *data128 |= ((__uint128_t)7) << *shift;
1393                         *shift += 3;
1394
1395                         return 1;
1396                 }
1397         }
1398
1399         lmt_used = 0;
1400         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1401                 /* Fit consecutive packets in same LMTLINE. */
1402                 if ((segdw[j] + segdw[j + 1]) <= 8) {
1403                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1404                                 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1405                                                            &cmd0[j], &cmd1[j],
1406                                                            segdw[j], flags);
1407                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1408                                                            &cmd0[j + 1],
1409                                                            &cmd1[j + 1],
1410                                                            segdw[j + 1], flags);
1411                                 /* TSTAMP takes 4 each, no segs. */
1412                                 vst1q_u64(lmt_addr, cmd0[j]);
1413                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1414                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1415                                 vst1q_u64(lmt_addr + 6, cmd3[j]);
1416
1417                                 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1418                                 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1419                                 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1420                                 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1421                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1422                                 /* EXT header take 3 each, space for 2 segs.*/
1423                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1424                                                            lmt_addr + 6,
1425                                                            &cmd0[j], &cmd1[j],
1426                                                            segdw[j], flags);
1427                                 vst1q_u64(lmt_addr, cmd0[j]);
1428                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1429                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1430                                 off = segdw[j] - 3;
1431                                 off <<= 1;
1432                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1433                                                            lmt_addr + 12 + off,
1434                                                            &cmd0[j + 1],
1435                                                            &cmd1[j + 1],
1436                                                            segdw[j + 1], flags);
1437                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1438                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1439                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1440                         } else {
1441                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1442                                                            lmt_addr + 4,
1443                                                            &cmd0[j], &cmd1[j],
1444                                                            segdw[j], flags);
1445                                 vst1q_u64(lmt_addr, cmd0[j]);
1446                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1447                                 off = segdw[j] - 2;
1448                                 off <<= 1;
1449                                 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1450                                                            lmt_addr + 8 + off,
1451                                                            &cmd0[j + 1],
1452                                                            &cmd1[j + 1],
1453                                                            segdw[j + 1], flags);
1454                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1455                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1456                         }
1457                         *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1458                                     << *shift;
1459                         *shift += 3;
1460                         j += 2;
1461                 } else {
1462                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
1463                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1464                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1465                                                            lmt_addr + 6,
1466                                                            &cmd0[j], &cmd1[j],
1467                                                            segdw[j], flags);
1468                                 vst1q_u64(lmt_addr, cmd0[j]);
1469                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1470                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1471                                 off = segdw[j] - 4;
1472                                 off <<= 1;
1473                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1474                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1475                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1476                                                            lmt_addr + 6,
1477                                                            &cmd0[j], &cmd1[j],
1478                                                            segdw[j], flags);
1479                                 vst1q_u64(lmt_addr, cmd0[j]);
1480                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
1481                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
1482                         } else {
1483                                 cn10k_nix_prepare_mseg_vec(mbufs[j],
1484                                                            lmt_addr + 4,
1485                                                            &cmd0[j], &cmd1[j],
1486                                                            segdw[j], flags);
1487                                 vst1q_u64(lmt_addr, cmd0[j]);
1488                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
1489                         }
1490                         *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1491                         *shift += 3;
1492                         j++;
1493                 }
1494                 lmt_used++;
1495                 lmt_addr += 16;
1496         }
1497
1498         return lmt_used;
1499 }
1500
1501 static __rte_always_inline void
1502 cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1503                    uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1504 {
1505         /* Go to next line if we are out of space */
1506         if ((*loff + (dw << 4)) > 128) {
1507                 *data128 = *data128 |
1508                            (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1509                 *shift = *shift + 3;
1510                 *loff = 0;
1511                 *lnum = *lnum + 1;
1512         }
1513
1514         *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1515         *loff = *loff + (dw << 4);
1516 }
1517
1518 static __rte_always_inline void
1519 cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1520                      uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1521                      uint64x2_t cmd3, const uint16_t flags)
1522 {
1523         uint8_t off;
1524
1525         /* Handle no fast free when security is enabled without mseg */
1526         if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1527             (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1528             !(flags & NIX_TX_MULTI_SEG_F)) {
1529                 union nix_send_sg_s sg;
1530
1531                 sg.u = vgetq_lane_u64(cmd1, 0);
1532                 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1533                 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1534
1535 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1536                 sg.u = vgetq_lane_u64(cmd1, 0);
1537                 if (!(sg.u & (1ULL << 55)))
1538                         RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1539                                                 0);
1540                 rte_io_wmb();
1541 #endif
1542         }
1543         if (flags & NIX_TX_MULTI_SEG_F) {
1544                 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1545                     (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1546                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1547                                                    &cmd0, &cmd1, segdw, flags);
1548                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1549                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1550                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1551                         off = segdw - 4;
1552                         off <<= 4;
1553                         vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1554                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1555                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1556                                                    &cmd0, &cmd1, segdw, flags);
1557                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1558                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1559                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1560                 } else {
1561                         cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1562                                                    &cmd0, &cmd1, segdw, flags);
1563                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1564                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1565                 }
1566         } else if (flags & NIX_TX_NEED_EXT_HDR) {
1567                 /* Store the prepared send desc to LMT lines */
1568                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1569                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1570                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1571                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1572                         vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1573                 } else {
1574                         vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1575                         vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1576                         vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1577                 }
1578         } else {
1579                 /* Store the prepared send desc to LMT lines */
1580                 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1581                 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1582         }
1583 }
1584
1585 static __rte_always_inline uint16_t
1586 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1587                            struct rte_mbuf **tx_pkts, uint16_t pkts,
1588                            uint64_t *cmd, const uint16_t flags)
1589 {
1590         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1591         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1592         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1593                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1594         uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1595         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1596         uint64x2_t senddesc01_w0, senddesc23_w0;
1597         uint64x2_t senddesc01_w1, senddesc23_w1;
1598         uint64x2_t sendext01_w0, sendext23_w0;
1599         uint64x2_t sendext01_w1, sendext23_w1;
1600         uint64x2_t sendmem01_w0, sendmem23_w0;
1601         uint64x2_t sendmem01_w1, sendmem23_w1;
1602         uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1603         uint64x2_t sgdesc01_w0, sgdesc23_w0;
1604         uint64x2_t sgdesc01_w1, sgdesc23_w1;
1605         struct cn10k_eth_txq *txq = tx_queue;
1606         rte_iova_t io_addr = txq->io_addr;
1607         uintptr_t laddr = txq->lmt_base;
1608         uint8_t c_lnum, c_shft, c_loff;
1609         uint64x2_t ltypes01, ltypes23;
1610         uint64x2_t xtmp128, ytmp128;
1611         uint64x2_t xmask01, xmask23;
1612         uintptr_t c_laddr = laddr;
1613         uint8_t lnum, shift, loff;
1614         rte_iova_t c_io_addr;
1615         uint64_t sa_base;
1616         union wdata {
1617                 __uint128_t data128;
1618                 uint64_t data[2];
1619         } wd;
1620
1621         if (!(flags & NIX_TX_VWQE_F)) {
1622                 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1623                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1624                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1625                 /* Reduce the cached count */
1626                 txq->fc_cache_pkts -= pkts;
1627         } else {
1628                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1629                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1630         }
1631
1632         /* Perform header writes before barrier for TSO */
1633         if (flags & NIX_TX_OFFLOAD_TSO_F) {
1634                 for (i = 0; i < pkts; i++)
1635                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1636         }
1637
1638         if (!(flags & NIX_TX_VWQE_F)) {
1639                 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1640         } else {
1641                 uint64_t w0 =
1642                         (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1643                         ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1644
1645                 senddesc01_w0 = vdupq_n_u64(w0);
1646         }
1647         senddesc23_w0 = senddesc01_w0;
1648
1649         senddesc01_w1 = vdupq_n_u64(0);
1650         senddesc23_w1 = senddesc01_w1;
1651         sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1652         sgdesc23_w0 = sgdesc01_w0;
1653
1654         if (flags & NIX_TX_NEED_EXT_HDR) {
1655                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1656                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1657                                                    BIT_ULL(15));
1658                         sendmem01_w0 =
1659                                 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1660                                             (NIX_SENDMEMALG_SETTSTMP << 56));
1661                         sendmem23_w0 = sendmem01_w0;
1662                         sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1663                         sendmem23_w1 = sendmem01_w1;
1664                 } else {
1665                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1666                 }
1667                 sendext23_w0 = sendext01_w0;
1668
1669                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1670                         sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1671                 else
1672                         sendext01_w1 = vdupq_n_u64(0);
1673                 sendext23_w1 = sendext01_w1;
1674         }
1675
1676         /* Get LMT base address and LMT ID as lcore id */
1677         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1678         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1679                 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1680                 c_io_addr = txq->cpt_io_addr;
1681                 sa_base = txq->sa_base;
1682         }
1683
1684         left = pkts;
1685 again:
1686         /* Number of packets to prepare depends on offloads enabled. */
1687         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1688                               cn10k_nix_pkts_per_vec_brst(flags) :
1689                               left;
1690         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1691                 wd.data128 = 0;
1692                 shift = 16;
1693         }
1694         lnum = 0;
1695         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1696                 loff = 0;
1697                 c_loff = 0;
1698                 c_lnum = 0;
1699                 c_shft = 16;
1700         }
1701
1702         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1703                 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1704                         burst = i;
1705                         break;
1706                 }
1707
1708                 if (flags & NIX_TX_MULTI_SEG_F) {
1709                         uint8_t j;
1710
1711                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1712                                 struct rte_mbuf *m = tx_pkts[j];
1713
1714                                 /* Get dwords based on nb_segs. */
1715                                 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1716                                 /* Add dwords based on offloads. */
1717                                 segdw[j] += 1 + /* SEND HDR */
1718                                             !!(flags & NIX_TX_NEED_EXT_HDR) +
1719                                             !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1720                         }
1721
1722                         /* Check if there are enough LMTLINES for this loop */
1723                         if (lnum + 4 > 32) {
1724                                 uint8_t ldwords_con = 0, lneeded = 0;
1725                                 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1726                                         ldwords_con += segdw[j];
1727                                         if (ldwords_con > 8) {
1728                                                 lneeded += 1;
1729                                                 ldwords_con = segdw[j];
1730                                         }
1731                                 }
1732                                 lneeded += 1;
1733                                 if (lnum + lneeded > 32) {
1734                                         burst = i;
1735                                         break;
1736                                 }
1737                         }
1738                 }
1739                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1740                 senddesc01_w0 =
1741                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1742                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1743
1744                 senddesc23_w0 = senddesc01_w0;
1745                 sgdesc23_w0 = sgdesc01_w0;
1746
1747                 /* Clear vlan enables. */
1748                 if (flags & NIX_TX_NEED_EXT_HDR) {
1749                         sendext01_w1 = vbicq_u64(sendext01_w1,
1750                                                  vdupq_n_u64(0x3FFFF00FFFF00));
1751                         sendext23_w1 = sendext01_w1;
1752                 }
1753
1754                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1755                         /* Reset send mem alg to SETTSTMP from SUB*/
1756                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
1757                                                  vdupq_n_u64(BIT_ULL(59)));
1758                         /* Reset send mem address to default. */
1759                         sendmem01_w1 =
1760                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1761                         sendmem23_w0 = sendmem01_w0;
1762                         sendmem23_w1 = sendmem01_w1;
1763                 }
1764
1765                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1766                         /* Clear the LSO enable bit. */
1767                         sendext01_w0 = vbicq_u64(sendext01_w0,
1768                                                  vdupq_n_u64(BIT_ULL(14)));
1769                         sendext23_w0 = sendext01_w0;
1770                 }
1771
1772                 /* Move mbufs to iova */
1773                 mbuf0 = (uint64_t *)tx_pkts[0];
1774                 mbuf1 = (uint64_t *)tx_pkts[1];
1775                 mbuf2 = (uint64_t *)tx_pkts[2];
1776                 mbuf3 = (uint64_t *)tx_pkts[3];
1777
1778                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1779                                      offsetof(struct rte_mbuf, buf_iova));
1780                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1781                                      offsetof(struct rte_mbuf, buf_iova));
1782                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1783                                      offsetof(struct rte_mbuf, buf_iova));
1784                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1785                                      offsetof(struct rte_mbuf, buf_iova));
1786                 /*
1787                  * Get mbuf's, olflags, iova, pktlen, dataoff
1788                  * dataoff_iovaX.D[0] = iova,
1789                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1790                  * len_olflagsX.D[0] = ol_flags,
1791                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1792                  */
1793                 dataoff_iova0 = vld1q_u64(mbuf0);
1794                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1795                 dataoff_iova1 = vld1q_u64(mbuf1);
1796                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1797                 dataoff_iova2 = vld1q_u64(mbuf2);
1798                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1799                 dataoff_iova3 = vld1q_u64(mbuf3);
1800                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1801
1802                 /* Move mbufs to point pool */
1803                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1804                                      offsetof(struct rte_mbuf, pool) -
1805                                      offsetof(struct rte_mbuf, buf_iova));
1806                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1807                                      offsetof(struct rte_mbuf, pool) -
1808                                      offsetof(struct rte_mbuf, buf_iova));
1809                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1810                                      offsetof(struct rte_mbuf, pool) -
1811                                      offsetof(struct rte_mbuf, buf_iova));
1812                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1813                                      offsetof(struct rte_mbuf, pool) -
1814                                      offsetof(struct rte_mbuf, buf_iova));
1815
1816                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1817                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1818                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1819                         /*
1820                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1821                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1822                          */
1823
1824                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1825                                      : [a] "+w"(senddesc01_w1)
1826                                      : [in] "r"(mbuf0 + 2)
1827                                      : "memory");
1828
1829                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1830                                      : [a] "+w"(senddesc01_w1)
1831                                      : [in] "r"(mbuf1 + 2)
1832                                      : "memory");
1833
1834                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1835                                      : [b] "+w"(senddesc23_w1)
1836                                      : [in] "r"(mbuf2 + 2)
1837                                      : "memory");
1838
1839                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1840                                      : [b] "+w"(senddesc23_w1)
1841                                      : [in] "r"(mbuf3 + 2)
1842                                      : "memory");
1843
1844                         /* Get pool pointer alone */
1845                         mbuf0 = (uint64_t *)*mbuf0;
1846                         mbuf1 = (uint64_t *)*mbuf1;
1847                         mbuf2 = (uint64_t *)*mbuf2;
1848                         mbuf3 = (uint64_t *)*mbuf3;
1849                 } else {
1850                         /* Get pool pointer alone */
1851                         mbuf0 = (uint64_t *)*mbuf0;
1852                         mbuf1 = (uint64_t *)*mbuf1;
1853                         mbuf2 = (uint64_t *)*mbuf2;
1854                         mbuf3 = (uint64_t *)*mbuf3;
1855                 }
1856
1857                 const uint8x16_t shuf_mask2 = {
1858                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1859                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1860                 };
1861                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1862                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1863
1864                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1865                 const uint64x2_t and_mask0 = {
1866                         0xFFFFFFFFFFFFFFFF,
1867                         0x000000000000FFFF,
1868                 };
1869
1870                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1871                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1872                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1873                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1874
1875                 /*
1876                  * Pick only 16 bits of pktlen preset at bits 63:32
1877                  * and place them at bits 15:0.
1878                  */
1879                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1880                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1881
1882                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1883                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1884                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1885
1886                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1887                  * pktlen at 15:0 position.
1888                  */
1889                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1890                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1891                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1892                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1893
1894                 /* Move mbuf to point to pool_id. */
1895                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1896                                      offsetof(struct rte_mempool, pool_id));
1897                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1898                                      offsetof(struct rte_mempool, pool_id));
1899                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1900                                      offsetof(struct rte_mempool, pool_id));
1901                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1902                                      offsetof(struct rte_mempool, pool_id));
1903
1904                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1905                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1906                         /*
1907                          * Lookup table to translate ol_flags to
1908                          * il3/il4 types. But we still use ol3/ol4 types in
1909                          * senddesc_w1 as only one header processing is enabled.
1910                          */
1911                         const uint8x16_t tbl = {
1912                                 /* [0-15] = il4type:il3type */
1913                                 0x04, /* none (IPv6 assumed) */
1914                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1915                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1916                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1917                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1918                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1919                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1920                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1921                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1922                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1923                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1924                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1925                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1926                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1927                                        * RTE_MBUF_F_TX_TCP_CKSUM
1928                                        */
1929                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1930                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1931                                        */
1932                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1933                                        * RTE_MBUF_F_TX_UDP_CKSUM
1934                                        */
1935                         };
1936
1937                         /* Extract olflags to translate to iltypes */
1938                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1939                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1940
1941                         /*
1942                          * E(47):L3_LEN(9):L2_LEN(7+z)
1943                          * E(47):L3_LEN(9):L2_LEN(7+z)
1944                          */
1945                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1946                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1947
1948                         /* Move OLFLAGS bits 55:52 to 51:48
1949                          * with zeros preprended on the byte and rest
1950                          * don't care
1951                          */
1952                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1953                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1954                         /*
1955                          * E(48):L3_LEN(8):L2_LEN(z+7)
1956                          * E(48):L3_LEN(8):L2_LEN(z+7)
1957                          */
1958                         const int8x16_t tshft3 = {
1959                                 -1, 0, 8, 8, 8, 8, 8, 8,
1960                                 -1, 0, 8, 8, 8, 8, 8, 8,
1961                         };
1962
1963                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1964                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1965
1966                         /* Do the lookup */
1967                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1968                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1969
1970                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1971                          * and place it in ol3/ol4type of senddesc_w1
1972                          */
1973                         const uint8x16_t shuf_mask0 = {
1974                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1975                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1976                         };
1977
1978                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1979                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1980
1981                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1982                          * a [E(32):E(16):OL3(8):OL2(8)]
1983                          * a = a + (a << 8)
1984                          * a [E(32):E(16):(OL3+OL2):OL2]
1985                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1986                          */
1987                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1988                                                  vshlq_n_u16(senddesc01_w1, 8));
1989                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1990                                                  vshlq_n_u16(senddesc23_w1, 8));
1991
1992                         /* Move ltypes to senddesc*_w1 */
1993                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1994                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1995                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1996                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1997                         /*
1998                          * Lookup table to translate ol_flags to
1999                          * ol3/ol4 types.
2000                          */
2001
2002                         const uint8x16_t tbl = {
2003                                 /* [0-15] = ol4type:ol3type */
2004                                 0x00, /* none */
2005                                 0x03, /* OUTER_IP_CKSUM */
2006                                 0x02, /* OUTER_IPV4 */
2007                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2008                                 0x04, /* OUTER_IPV6 */
2009                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2010                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2011                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2012                                        * OUTER_IP_CKSUM
2013                                        */
2014                                 0x00, /* OUTER_UDP_CKSUM */
2015                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
2016                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
2017                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
2018                                        * OUTER_IP_CKSUM
2019                                        */
2020                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
2021                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2022                                        * OUTER_IP_CKSUM
2023                                        */
2024                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2025                                        * OUTER_IPV4
2026                                        */
2027                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2028                                        * OUTER_IPV4 | OUTER_IP_CKSUM
2029                                        */
2030                         };
2031
2032                         /* Extract olflags to translate to iltypes */
2033                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2034                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2035
2036                         /*
2037                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
2038                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
2039                          */
2040                         const uint8x16_t shuf_mask5 = {
2041                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2042                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2043                         };
2044                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2045                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2046
2047                         /* Extract outer ol flags only */
2048                         const uint64x2_t o_cksum_mask = {
2049                                 0x1C00020000000000,
2050                                 0x1C00020000000000,
2051                         };
2052
2053                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
2054                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
2055
2056                         /* Extract OUTER_UDP_CKSUM bit 41 and
2057                          * move it to bit 61
2058                          */
2059
2060                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2061                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2062
2063                         /* Shift oltype by 2 to start nibble from BIT(56)
2064                          * instead of BIT(58)
2065                          */
2066                         xtmp128 = vshrq_n_u8(xtmp128, 2);
2067                         ytmp128 = vshrq_n_u8(ytmp128, 2);
2068                         /*
2069                          * E(48):L3_LEN(8):L2_LEN(z+7)
2070                          * E(48):L3_LEN(8):L2_LEN(z+7)
2071                          */
2072                         const int8x16_t tshft3 = {
2073                                 -1, 0, 8, 8, 8, 8, 8, 8,
2074                                 -1, 0, 8, 8, 8, 8, 8, 8,
2075                         };
2076
2077                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2078                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2079
2080                         /* Do the lookup */
2081                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
2082                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
2083
2084                         /* Pick only relevant fields i.e Bit 56:63 of oltype
2085                          * and place it in ol3/ol4type of senddesc_w1
2086                          */
2087                         const uint8x16_t shuf_mask0 = {
2088                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
2089                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
2090                         };
2091
2092                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2093                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2094
2095                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
2096                          * a [E(32):E(16):OL3(8):OL2(8)]
2097                          * a = a + (a << 8)
2098                          * a [E(32):E(16):(OL3+OL2):OL2]
2099                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
2100                          */
2101                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2102                                                  vshlq_n_u16(senddesc01_w1, 8));
2103                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2104                                                  vshlq_n_u16(senddesc23_w1, 8));
2105
2106                         /* Move ltypes to senddesc*_w1 */
2107                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2108                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2109                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
2110                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
2111                         /* Lookup table to translate ol_flags to
2112                          * ol4type, ol3type, il4type, il3type of senddesc_w1
2113                          */
2114                         const uint8x16x2_t tbl = {{
2115                                 {
2116                                         /* [0-15] = il4type:il3type */
2117                                         0x04, /* none (IPv6) */
2118                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
2119                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
2120                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
2121                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
2122                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
2123                                                * RTE_MBUF_F_TX_TCP_CKSUM
2124                                                */
2125                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2126                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2127                                                */
2128                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2129                                                * RTE_MBUF_F_TX_UDP_CKSUM
2130                                                */
2131                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
2132                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
2133                                                * RTE_MBUF_F_TX_TCP_CKSUM
2134                                                */
2135                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
2136                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2137                                                */
2138                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
2139                                                * RTE_MBUF_F_TX_UDP_CKSUM
2140                                                */
2141                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
2142                                                * RTE_MBUF_F_TX_IP_CKSUM
2143                                                */
2144                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2145                                                * RTE_MBUF_F_TX_TCP_CKSUM
2146                                                */
2147                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2148                                                * RTE_MBUF_F_TX_SCTP_CKSUM
2149                                                */
2150                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2151                                                * RTE_MBUF_F_TX_UDP_CKSUM
2152                                                */
2153                                 },
2154
2155                                 {
2156                                         /* [16-31] = ol4type:ol3type */
2157                                         0x00, /* none */
2158                                         0x03, /* OUTER_IP_CKSUM */
2159                                         0x02, /* OUTER_IPV4 */
2160                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2161                                         0x04, /* OUTER_IPV6 */
2162                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2163                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2164                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2165                                                * OUTER_IP_CKSUM
2166                                                */
2167                                         0x00, /* OUTER_UDP_CKSUM */
2168                                         0x33, /* OUTER_UDP_CKSUM |
2169                                                * OUTER_IP_CKSUM
2170                                                */
2171                                         0x32, /* OUTER_UDP_CKSUM |
2172                                                * OUTER_IPV4
2173                                                */
2174                                         0x33, /* OUTER_UDP_CKSUM |
2175                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2176                                                */
2177                                         0x34, /* OUTER_UDP_CKSUM |
2178                                                * OUTER_IPV6
2179                                                */
2180                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2181                                                * OUTER_IP_CKSUM
2182                                                */
2183                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2184                                                * OUTER_IPV4
2185                                                */
2186                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2187                                                * OUTER_IPV4 | OUTER_IP_CKSUM
2188                                                */
2189                                 },
2190                         }};
2191
2192                         /* Extract olflags to translate to oltype & iltype */
2193                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2194                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2195
2196                         /*
2197                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2198                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2199                          */
2200                         const uint32x4_t tshft_4 = {
2201                                 1,
2202                                 0,
2203                                 1,
2204                                 0,
2205                         };
2206                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2207                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2208
2209                         /*
2210                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2211                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2212                          */
2213                         const uint8x16_t shuf_mask5 = {
2214                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2215                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2216                         };
2217                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2218                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2219
2220                         /* Extract outer and inner header ol_flags */
2221                         const uint64x2_t oi_cksum_mask = {
2222                                 0x1CF0020000000000,
2223                                 0x1CF0020000000000,
2224                         };
2225
2226                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2227                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2228
2229                         /* Extract OUTER_UDP_CKSUM bit 41 and
2230                          * move it to bit 61
2231                          */
2232
2233                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2234                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2235
2236                         /* Shift right oltype by 2 and iltype by 4
2237                          * to start oltype nibble from BIT(58)
2238                          * instead of BIT(56) and iltype nibble from BIT(48)
2239                          * instead of BIT(52).
2240                          */
2241                         const int8x16_t tshft5 = {
2242                                 8, 8, 8, 8, 8, 8, -4, -2,
2243                                 8, 8, 8, 8, 8, 8, -4, -2,
2244                         };
2245
2246                         xtmp128 = vshlq_u8(xtmp128, tshft5);
2247                         ytmp128 = vshlq_u8(ytmp128, tshft5);
2248                         /*
2249                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2250                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2251                          */
2252                         const int8x16_t tshft3 = {
2253                                 -1, 0, -1, 0, 0, 0, 0, 0,
2254                                 -1, 0, -1, 0, 0, 0, 0, 0,
2255                         };
2256
2257                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2258                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2259
2260                         /* Mark Bit(4) of oltype */
2261                         const uint64x2_t oi_cksum_mask2 = {
2262                                 0x1000000000000000,
2263                                 0x1000000000000000,
2264                         };
2265
2266                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2267                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2268
2269                         /* Do the lookup */
2270                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2271                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2272
2273                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
2274                          * Bit 56:63 of oltype and place it in corresponding
2275                          * place in senddesc_w1.
2276                          */
2277                         const uint8x16_t shuf_mask0 = {
2278                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2279                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2280                         };
2281
2282                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2283                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2284
2285                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2286                          * l3len, l2len, ol3len, ol2len.
2287                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2288                          * a = a + (a << 8)
2289                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2290                          * a = a + (a << 16)
2291                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2292                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2293                          */
2294                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
2295                                                  vshlq_n_u32(senddesc01_w1, 8));
2296                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
2297                                                  vshlq_n_u32(senddesc23_w1, 8));
2298
2299                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2300                         senddesc01_w1 = vaddq_u8(
2301                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2302                         senddesc23_w1 = vaddq_u8(
2303                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2304
2305                         /* Move ltypes to senddesc*_w1 */
2306                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2307                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2308                 }
2309
2310                 xmask01 = vdupq_n_u64(0);
2311                 xmask23 = xmask01;
2312                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2313                              : [a] "+w"(xmask01)
2314                              : [in] "r"(mbuf0)
2315                              : "memory");
2316
2317                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2318                              : [a] "+w"(xmask01)
2319                              : [in] "r"(mbuf1)
2320                              : "memory");
2321
2322                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2323                              : [b] "+w"(xmask23)
2324                              : [in] "r"(mbuf2)
2325                              : "memory");
2326
2327                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2328                              : [b] "+w"(xmask23)
2329                              : [in] "r"(mbuf3)
2330                              : "memory");
2331                 xmask01 = vshlq_n_u64(xmask01, 20);
2332                 xmask23 = vshlq_n_u64(xmask23, 20);
2333
2334                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2335                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2336
2337                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2338                         /* Tx ol_flag for vlan. */
2339                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2340                         /* Bit enable for VLAN1 */
2341                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2342                         /* Tx ol_flag for QnQ. */
2343                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2344                         /* Bit enable for VLAN0 */
2345                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2346                         /* Load vlan values from packet. outer is VLAN 0 */
2347                         uint64x2_t ext01 = {
2348                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2349                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2350                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2351                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2352                         };
2353                         uint64x2_t ext23 = {
2354                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2355                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2356                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2357                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2358                         };
2359
2360                         /* Get ol_flags of the packets. */
2361                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2362                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2363
2364                         /* ORR vlan outer/inner values into cmd. */
2365                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2366                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2367
2368                         /* Test for offload enable bits and generate masks. */
2369                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2370                                                       mlv),
2371                                             vandq_u64(vtstq_u64(xtmp128, olq),
2372                                                       mlq));
2373                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2374                                                       mlv),
2375                                             vandq_u64(vtstq_u64(ytmp128, olq),
2376                                                       mlq));
2377
2378                         /* Set vlan enable bits into cmd based on mask. */
2379                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2380                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2381                 }
2382
2383                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2384                         /* Tx ol_flag for timestamp. */
2385                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2386                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
2387                         /* Set send mem alg to SUB. */
2388                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2389                         /* Increment send mem address by 8. */
2390                         const uint64x2_t addr = {0x8, 0x8};
2391
2392                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2393                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2394
2395                         /* Check if timestamp is requested and generate inverted
2396                          * mask as we need not make any changes to default cmd
2397                          * value.
2398                          */
2399                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2400                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2401
2402                         /* Change send mem address to an 8 byte offset when
2403                          * TSTMP is disabled.
2404                          */
2405                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
2406                                                  vandq_u64(xtmp128, addr));
2407                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
2408                                                  vandq_u64(ytmp128, addr));
2409                         /* Change send mem alg to SUB when TSTMP is disabled. */
2410                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
2411                                                  vandq_u64(xtmp128, alg));
2412                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
2413                                                  vandq_u64(ytmp128, alg));
2414
2415                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2416                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2417                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2418                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2419                 }
2420
2421                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2422                         const uint64_t lso_fmt = txq->lso_tun_fmt;
2423                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2424                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2425
2426                         /* Extract SD W1 as we need to set L4 types. */
2427                         vst1q_u64(sd_w1, senddesc01_w1);
2428                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
2429
2430                         /* Extract SX W0 as we need to set LSO fields. */
2431                         vst1q_u64(sx_w0, sendext01_w0);
2432                         vst1q_u64(sx_w0 + 2, sendext23_w0);
2433
2434                         /* Extract ol_flags. */
2435                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2436                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2437
2438                         /* Prepare individual mbufs. */
2439                         cn10k_nix_prepare_tso(tx_pkts[0],
2440                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
2441                                 (union nix_send_ext_w0_u *)&sx_w0[0],
2442                                 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2443
2444                         cn10k_nix_prepare_tso(tx_pkts[1],
2445                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
2446                                 (union nix_send_ext_w0_u *)&sx_w0[1],
2447                                 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2448
2449                         cn10k_nix_prepare_tso(tx_pkts[2],
2450                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
2451                                 (union nix_send_ext_w0_u *)&sx_w0[2],
2452                                 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2453
2454                         cn10k_nix_prepare_tso(tx_pkts[3],
2455                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
2456                                 (union nix_send_ext_w0_u *)&sx_w0[3],
2457                                 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2458
2459                         senddesc01_w1 = vld1q_u64(sd_w1);
2460                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2461
2462                         sendext01_w0 = vld1q_u64(sx_w0);
2463                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
2464                 }
2465
2466                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2467                     !(flags & NIX_TX_MULTI_SEG_F) &&
2468                     !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2469                         /* Set don't free bit if reference count > 1 */
2470                         xmask01 = vdupq_n_u64(0);
2471                         xmask23 = xmask01;
2472
2473                         /* Move mbufs to iova */
2474                         mbuf0 = (uint64_t *)tx_pkts[0];
2475                         mbuf1 = (uint64_t *)tx_pkts[1];
2476                         mbuf2 = (uint64_t *)tx_pkts[2];
2477                         mbuf3 = (uint64_t *)tx_pkts[3];
2478
2479                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2480                                 vsetq_lane_u64(0x80000, xmask01, 0);
2481                         else
2482                                 RTE_MEMPOOL_CHECK_COOKIES(
2483                                         ((struct rte_mbuf *)mbuf0)->pool,
2484                                         (void **)&mbuf0, 1, 0);
2485
2486                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2487                                 vsetq_lane_u64(0x80000, xmask01, 1);
2488                         else
2489                                 RTE_MEMPOOL_CHECK_COOKIES(
2490                                         ((struct rte_mbuf *)mbuf1)->pool,
2491                                         (void **)&mbuf1, 1, 0);
2492
2493                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2494                                 vsetq_lane_u64(0x80000, xmask23, 0);
2495                         else
2496                                 RTE_MEMPOOL_CHECK_COOKIES(
2497                                         ((struct rte_mbuf *)mbuf2)->pool,
2498                                         (void **)&mbuf2, 1, 0);
2499
2500                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2501                                 vsetq_lane_u64(0x80000, xmask23, 1);
2502                         else
2503                                 RTE_MEMPOOL_CHECK_COOKIES(
2504                                         ((struct rte_mbuf *)mbuf3)->pool,
2505                                         (void **)&mbuf3, 1, 0);
2506                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2507                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2508                 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2509                            !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2510                         /* Move mbufs to iova */
2511                         mbuf0 = (uint64_t *)tx_pkts[0];
2512                         mbuf1 = (uint64_t *)tx_pkts[1];
2513                         mbuf2 = (uint64_t *)tx_pkts[2];
2514                         mbuf3 = (uint64_t *)tx_pkts[3];
2515
2516                         /* Mark mempool object as "put" since
2517                          * it is freed by NIX
2518                          */
2519                         RTE_MEMPOOL_CHECK_COOKIES(
2520                                 ((struct rte_mbuf *)mbuf0)->pool,
2521                                 (void **)&mbuf0, 1, 0);
2522
2523                         RTE_MEMPOOL_CHECK_COOKIES(
2524                                 ((struct rte_mbuf *)mbuf1)->pool,
2525                                 (void **)&mbuf1, 1, 0);
2526
2527                         RTE_MEMPOOL_CHECK_COOKIES(
2528                                 ((struct rte_mbuf *)mbuf2)->pool,
2529                                 (void **)&mbuf2, 1, 0);
2530
2531                         RTE_MEMPOOL_CHECK_COOKIES(
2532                                 ((struct rte_mbuf *)mbuf3)->pool,
2533                                 (void **)&mbuf3, 1, 0);
2534                 }
2535
2536                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2537                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2538                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2539                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2540                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2541
2542                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2543                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2544                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2545                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2546
2547                 if (flags & NIX_TX_NEED_EXT_HDR) {
2548                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2549                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2550                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2551                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2552                 }
2553
2554                 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2555                         const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2556                                                 RTE_MBUF_F_TX_SEC_OFFLOAD};
2557                         uintptr_t next;
2558                         uint8_t dw;
2559
2560                         /* Extract ol_flags. */
2561                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2562                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2563
2564                         xtmp128 = vtstq_u64(olf, xtmp128);
2565                         ytmp128 = vtstq_u64(olf, ytmp128);
2566
2567                         /* Process mbuf0 */
2568                         dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2569                         if (vgetq_lane_u64(xtmp128, 0))
2570                                 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2571                                                        &cmd1[0], &next, c_laddr,
2572                                                        &c_lnum, &c_loff,
2573                                                        &c_shft, sa_base, flags);
2574                         else
2575                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2576                                                    &shift, &wd.data128, &next);
2577
2578                         /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2579                         cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2580                                              cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2581                                              flags);
2582
2583                         /* Process mbuf1 */
2584                         dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2585                         if (vgetq_lane_u64(xtmp128, 1))
2586                                 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2587                                                        &cmd1[1], &next, c_laddr,
2588                                                        &c_lnum, &c_loff,
2589                                                        &c_shft, sa_base, flags);
2590                         else
2591                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2592                                                    &shift, &wd.data128, &next);
2593
2594                         /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2595                         cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2596                                              cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2597                                              flags);
2598
2599                         /* Process mbuf2 */
2600                         dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2601                         if (vgetq_lane_u64(ytmp128, 0))
2602                                 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2603                                                        &cmd1[2], &next, c_laddr,
2604                                                        &c_lnum, &c_loff,
2605                                                        &c_shft, sa_base, flags);
2606                         else
2607                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2608                                                    &shift, &wd.data128, &next);
2609
2610                         /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2611                         cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2612                                              cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2613                                              flags);
2614
2615                         /* Process mbuf3 */
2616                         dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2617                         if (vgetq_lane_u64(ytmp128, 1))
2618                                 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2619                                                        &cmd1[3], &next, c_laddr,
2620                                                        &c_lnum, &c_loff,
2621                                                        &c_shft, sa_base, flags);
2622                         else
2623                                 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2624                                                    &shift, &wd.data128, &next);
2625
2626                         /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2627                         cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2628                                              cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2629                                              flags);
2630
2631                 } else if (flags & NIX_TX_MULTI_SEG_F) {
2632                         uint8_t j;
2633
2634                         segdw[4] = 8;
2635                         j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2636                                                           cmd2, cmd3, segdw,
2637                                                           (uint64_t *)
2638                                                           LMT_OFF(laddr, lnum,
2639                                                                   0),
2640                                                           &wd.data128, &shift,
2641                                                           flags);
2642                         lnum += j;
2643                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2644                         /* Store the prepared send desc to LMT lines */
2645                         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2646                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2647                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2648                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2649                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2650                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2651                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2652                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2653                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2654                                 lnum += 1;
2655                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2656                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2657                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2658                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2659                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2660                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2661                                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2662                                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2663                         } else {
2664                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2665                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2666                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2667                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2668                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2669                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2670                                 lnum += 1;
2671                                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2672                                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2673                                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2674                                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2675                                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2676                                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2677                         }
2678                         lnum += 1;
2679                 } else {
2680                         /* Store the prepared send desc to LMT lines */
2681                         vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2682                         vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2683                         vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2684                         vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2685                         vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2686                         vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2687                         vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2688                         vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2689                         lnum += 1;
2690                 }
2691
2692                 if (flags & NIX_TX_MULTI_SEG_F) {
2693                         tx_pkts[0]->next = NULL;
2694                         tx_pkts[1]->next = NULL;
2695                         tx_pkts[2]->next = NULL;
2696                         tx_pkts[3]->next = NULL;
2697                 }
2698
2699                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2700         }
2701
2702         /* Roundup lnum to last line if it is partial */
2703         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2704                 lnum = lnum + !!loff;
2705                 wd.data128 = wd.data128 |
2706                         (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2707         }
2708
2709         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2710                 wd.data[0] >>= 16;
2711
2712         if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
2713                 ws[1] = roc_sso_hws_head_wait(ws[0]);
2714
2715         left -= burst;
2716
2717         /* Submit CPT instructions if any */
2718         if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2719                 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
2720                 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2721                                      c_shft);
2722         }
2723
2724         /* Trigger LMTST */
2725         if (lnum > 16) {
2726                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2727                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2728
2729                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2730                 wd.data[0] &= ~0x7ULL;
2731
2732                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2733                         wd.data[0] <<= 16;
2734
2735                 wd.data[0] |= (15ULL << 12);
2736                 wd.data[0] |= (uint64_t)lmt_id;
2737
2738                 /* STEOR0 */
2739                 roc_lmt_submit_steorl(wd.data[0], pa);
2740
2741                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2742                         wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2743
2744                 pa = io_addr | (wd.data[1] & 0x7) << 4;
2745                 wd.data[1] &= ~0x7ULL;
2746
2747                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2748                         wd.data[1] <<= 16;
2749
2750                 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2751                 wd.data[1] |= (uint64_t)(lmt_id + 16);
2752
2753                 /* STEOR1 */
2754                 roc_lmt_submit_steorl(wd.data[1], pa);
2755         } else if (lnum) {
2756                 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2757                         wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2758
2759                 pa = io_addr | (wd.data[0] & 0x7) << 4;
2760                 wd.data[0] &= ~0x7ULL;
2761
2762                 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2763                         wd.data[0] <<= 16;
2764
2765                 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2766                 wd.data[0] |= lmt_id;
2767
2768                 /* STEOR0 */
2769                 roc_lmt_submit_steorl(wd.data[0], pa);
2770         }
2771
2772         rte_io_wmb();
2773         if (left)
2774                 goto again;
2775
2776         if (unlikely(scalar)) {
2777                 if (flags & NIX_TX_MULTI_SEG_F)
2778                         pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2779                                                          scalar, cmd, flags);
2780                 else
2781                         pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2782                                                     scalar, cmd, flags);
2783         }
2784
2785         return pkts;
2786 }
2787
2788 #else
2789 static __rte_always_inline uint16_t
2790 cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2791                            struct rte_mbuf **tx_pkts, uint16_t pkts,
2792                            uint64_t *cmd, const uint16_t flags)
2793 {
2794         RTE_SET_USED(ws);
2795         RTE_SET_USED(tx_queue);
2796         RTE_SET_USED(tx_pkts);
2797         RTE_SET_USED(pkts);
2798         RTE_SET_USED(cmd);
2799         RTE_SET_USED(flags);
2800         return 0;
2801 }
2802 #endif
2803
2804 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2805 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2806 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2807 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2808 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
2809 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2810 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2811
2812 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2813 #define NIX_TX_FASTPATH_MODES_0_15                                             \
2814         T(no_offload, 6, NIX_TX_OFFLOAD_NONE)                                  \
2815         T(l3l4csum, 6, L3L4CSUM_F)                                             \
2816         T(ol3ol4csum, 6, OL3OL4CSUM_F)                                         \
2817         T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F)                   \
2818         T(vlan, 6, VLAN_F)                                                     \
2819         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
2820         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
2821         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2822         T(noff, 6, NOFF_F)                                                     \
2823         T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F)                               \
2824         T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F)                           \
2825         T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2826         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
2827         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2828         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
2829         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
2830           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2831
2832 #define NIX_TX_FASTPATH_MODES_16_31                                            \
2833         T(tso, 6, TSO_F)                                                       \
2834         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
2835         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
2836         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
2837         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
2838         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
2839         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
2840         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2841           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2842         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
2843         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
2844         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
2845         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
2846           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2847         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
2848         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
2849         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2850         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2851           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2852
2853 #define NIX_TX_FASTPATH_MODES_32_47                                            \
2854         T(ts, 8, TSP_F)                                                        \
2855         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
2856         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
2857         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2858         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
2859         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
2860         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2861         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
2862           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2863         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
2864         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
2865         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2866         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
2867           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2868         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
2869         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
2870         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
2871         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
2872           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2873
2874 #define NIX_TX_FASTPATH_MODES_48_63                                            \
2875         T(ts_tso, 8, TSP_F | TSO_F)                                            \
2876         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
2877         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
2878         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
2879           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
2880         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
2881         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
2882         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
2883         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2884           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2885         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
2886         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
2887         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
2888         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
2889           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2890         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
2891         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
2892           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
2893         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
2894           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
2895         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2896           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2897
2898 #define NIX_TX_FASTPATH_MODES_64_79                                            \
2899         T(sec, 6, T_SEC_F)                                                     \
2900         T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F)                               \
2901         T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F)                           \
2902         T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2903         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
2904         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
2905         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
2906         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2907           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2908         T(sec_noff, 6, T_SEC_F | NOFF_F)                                       \
2909         T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2910         T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2911         T(sec_noff_ol3ol4csum_l3l4csum, 6,                                     \
2912           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2913         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2914         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2915         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2916           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2917         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2918           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2919
2920 #define NIX_TX_FASTPATH_MODES_80_95                                            \
2921         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2922         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2923         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2924         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2925           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2926         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2927         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2928         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2929         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2930           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2931         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2932         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2933         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2934         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2935           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2936         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2937         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2938           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2939         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2940           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2941         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2942           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2943
2944 #define NIX_TX_FASTPATH_MODES_96_111                                           \
2945         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2946         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2947         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2948         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2949           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2950         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2951         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2952         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2953         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2954           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2955         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2956         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2957         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2958         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2959           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2960         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2961         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2962           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2963         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2964           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2965         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2966           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2967
2968 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2969         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2970         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2971         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2972         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2973           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2974         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2975         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2976           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2977         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2978           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2979         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2980           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2981         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2982         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2983           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2984         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2985           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2986         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2987           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2988         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2989         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2990           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2991         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2992           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2993         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2994           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2995                   L3L4CSUM_F)
2996
2997 #define NIX_TX_FASTPATH_MODES                                                  \
2998         NIX_TX_FASTPATH_MODES_0_15                                             \
2999         NIX_TX_FASTPATH_MODES_16_31                                            \
3000         NIX_TX_FASTPATH_MODES_32_47                                            \
3001         NIX_TX_FASTPATH_MODES_48_63                                            \
3002         NIX_TX_FASTPATH_MODES_64_79                                            \
3003         NIX_TX_FASTPATH_MODES_80_95                                            \
3004         NIX_TX_FASTPATH_MODES_96_111                                           \
3005         NIX_TX_FASTPATH_MODES_112_127
3006
3007 #define T(name, sz, flags)                                                     \
3008         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
3009                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
3010         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
3011                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
3012         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
3013                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
3014         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
3015                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
3016
3017 NIX_TX_FASTPATH_MODES
3018 #undef T
3019
3020 #define NIX_TX_XMIT(fn, sz, flags)                                             \
3021         uint16_t __rte_noinline __rte_hot fn(                                  \
3022                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3023         {                                                                      \
3024                 uint64_t cmd[sz];                                              \
3025                 /* For TSO inner checksum is a must */                         \
3026                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3027                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3028                         return 0;                                              \
3029                 return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
3030                                            flags);                             \
3031         }
3032
3033 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
3034         uint16_t __rte_noinline __rte_hot fn(                                  \
3035                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3036         {                                                                      \
3037                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
3038                 /* For TSO inner checksum is a must */                         \
3039                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3040                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3041                         return 0;                                              \
3042                 return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
3043                                                 cmd,                           \
3044                                                 flags | NIX_TX_MULTI_SEG_F);   \
3045         }
3046
3047 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
3048         uint16_t __rte_noinline __rte_hot fn(                                  \
3049                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3050         {                                                                      \
3051                 uint64_t cmd[sz];                                              \
3052                 /* For TSO inner checksum is a must */                         \
3053                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3054                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3055                         return 0;                                              \
3056                 return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts,     \
3057                                                   pkts, cmd, (flags));         \
3058         }
3059
3060 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
3061         uint16_t __rte_noinline __rte_hot fn(                                  \
3062                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3063         {                                                                      \
3064                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
3065                 /* For TSO inner checksum is a must */                         \
3066                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3067                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3068                         return 0;                                              \
3069                 return cn10k_nix_xmit_pkts_vector(                             \
3070                         tx_queue, NULL, tx_pkts, pkts, cmd,                    \
3071                         (flags) | NIX_TX_MULTI_SEG_F);                         \
3072         }
3073
3074 #endif /* __CN10K_TX_H__ */