net/cnxk: support pending Tx mbuf cleanup
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #define NIX_TX_OFFLOAD_NONE           (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
15
16 /* Flags to control xmit_prepare function.
17  * Defining it from backwards to denote its been
18  * not used as offload flags to pick function
19  */
20 #define NIX_TX_MULTI_SEG_F BIT(15)
21
22 #define NIX_TX_NEED_SEND_HDR_W1                                                \
23         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
24          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
25
26 #define NIX_TX_NEED_EXT_HDR                                                    \
27         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
28
29 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
30         do {                                                                   \
31                 /* Cached value is low, Update the fc_cache_pkts */            \
32                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
33                         /* Multiply with sqe_per_sqb to express in pkts */     \
34                         (txq)->fc_cache_pkts =                                 \
35                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
36                                 << (txq)->sqes_per_sqb_log2;                   \
37                         /* Check it again for the room */                      \
38                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
39                                 return 0;                                      \
40                 }                                                              \
41         } while (0)
42
43 #define LMT_OFF(lmt_addr, lmt_num, offset)                                     \
44         (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
45
46 /* Function to determine no of tx subdesc required in case ext
47  * sub desc is enabled.
48  */
49 static __rte_always_inline int
50 cn10k_nix_tx_ext_subs(const uint16_t flags)
51 {
52         return (flags &
53                 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ? 1 : 0;
54 }
55
56 static __rte_always_inline uint8_t
57 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
58 {
59         RTE_SET_USED(flags);
60         /* We can pack up to 4 packets per LMTLINE if there are no offloads. */
61         return 4 << ROC_LMT_LINES_PER_CORE_LOG2;
62 }
63
64 static __rte_always_inline uint64_t
65 cn10k_nix_tx_steor_data(const uint16_t flags)
66 {
67         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
68         uint64_t data;
69
70         /* This will be moved to addr area */
71         data = dw_m1;
72         /* 15 vector sizes for single seg */
73         data |= dw_m1 << 19;
74         data |= dw_m1 << 22;
75         data |= dw_m1 << 25;
76         data |= dw_m1 << 28;
77         data |= dw_m1 << 31;
78         data |= dw_m1 << 34;
79         data |= dw_m1 << 37;
80         data |= dw_m1 << 40;
81         data |= dw_m1 << 43;
82         data |= dw_m1 << 46;
83         data |= dw_m1 << 49;
84         data |= dw_m1 << 52;
85         data |= dw_m1 << 55;
86         data |= dw_m1 << 58;
87         data |= dw_m1 << 61;
88
89         return data;
90 }
91
92 static __rte_always_inline uint64_t
93 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
94 {
95         const uint64_t dw_m1 = 0x7;
96         uint64_t data;
97
98         RTE_SET_USED(flags);
99         /* This will be moved to addr area */
100         data = dw_m1;
101         /* 15 vector sizes for single seg */
102         data |= dw_m1 << 19;
103         data |= dw_m1 << 22;
104         data |= dw_m1 << 25;
105         data |= dw_m1 << 28;
106         data |= dw_m1 << 31;
107         data |= dw_m1 << 34;
108         data |= dw_m1 << 37;
109         data |= dw_m1 << 40;
110         data |= dw_m1 << 43;
111         data |= dw_m1 << 46;
112         data |= dw_m1 << 49;
113         data |= dw_m1 << 52;
114         data |= dw_m1 << 55;
115         data |= dw_m1 << 58;
116         data |= dw_m1 << 61;
117
118         return data;
119 }
120
121 static __rte_always_inline void
122 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
123                       const uint16_t flags)
124 {
125         /* Send hdr */
126         cmd[0] = txq->send_hdr_w0;
127         cmd[1] = 0;
128         cmd += 2;
129
130         /* Send ext if present */
131         if (flags & NIX_TX_NEED_EXT_HDR) {
132                 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
133                 cmd += 2;
134         }
135
136         /* Send sg */
137         cmd[0] = txq->sg_w0;
138         cmd[1] = 0;
139 }
140
141 static __rte_always_inline void
142 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
143 {
144         uint64_t mask, ol_flags = m->ol_flags;
145
146         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
147                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
148                 uint16_t *iplen, *oiplen, *oudplen;
149                 uint16_t lso_sb, paylen;
150
151                 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
152                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
153                          m->l2_len + m->l3_len + m->l4_len;
154
155                 /* Reduce payload len from base headers */
156                 paylen = m->pkt_len - lso_sb;
157
158                 /* Get iplen position assuming no tunnel hdr */
159                 iplen = (uint16_t *)(mdata + m->l2_len +
160                                      (2 << !!(ol_flags & PKT_TX_IPV6)));
161                 /* Handle tunnel tso */
162                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
163                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
164                         const uint8_t is_udp_tun =
165                                 (CNXK_NIX_UDP_TUN_BITMASK >>
166                                  ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
167                                 0x1;
168
169                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
170                                               (2 << !!(ol_flags &
171                                                        PKT_TX_OUTER_IPV6)));
172                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
173                                                    paylen);
174
175                         /* Update format for UDP tunneled packet */
176                         if (is_udp_tun) {
177                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
178                                                        m->outer_l3_len + 4);
179                                 *oudplen = rte_cpu_to_be_16(
180                                         rte_be_to_cpu_16(*oudplen) - paylen);
181                         }
182
183                         /* Update iplen position to inner ip hdr */
184                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
185                                              m->l4_len +
186                                              (2 << !!(ol_flags & PKT_TX_IPV6)));
187                 }
188
189                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
190         }
191 }
192
193 static __rte_always_inline void
194 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
195                        const uint16_t flags, const uint64_t lso_tun_fmt)
196 {
197         struct nix_send_ext_s *send_hdr_ext;
198         struct nix_send_hdr_s *send_hdr;
199         uint64_t ol_flags = 0, mask;
200         union nix_send_hdr_w1_u w1;
201         union nix_send_sg_s *sg;
202
203         send_hdr = (struct nix_send_hdr_s *)cmd;
204         if (flags & NIX_TX_NEED_EXT_HDR) {
205                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
206                 sg = (union nix_send_sg_s *)(cmd + 4);
207                 /* Clear previous markings */
208                 send_hdr_ext->w0.lso = 0;
209                 send_hdr_ext->w1.u = 0;
210         } else {
211                 sg = (union nix_send_sg_s *)(cmd + 2);
212         }
213
214         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
215                 ol_flags = m->ol_flags;
216                 w1.u = 0;
217         }
218
219         if (!(flags & NIX_TX_MULTI_SEG_F)) {
220                 send_hdr->w0.total = m->data_len;
221                 send_hdr->w0.aura =
222                         roc_npa_aura_handle_to_aura(m->pool->pool_id);
223         }
224
225         /*
226          * L3type:  2 => IPV4
227          *          3 => IPV4 with csum
228          *          4 => IPV6
229          * L3type and L3ptr needs to be set for either
230          * L3 csum or L4 csum or LSO
231          *
232          */
233
234         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
235             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
236                 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
237                 const uint8_t ol3type =
238                         ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
239                         ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
240                         !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
241
242                 /* Outer L3 */
243                 w1.ol3type = ol3type;
244                 mask = 0xffffull << ((!!ol3type) << 4);
245                 w1.ol3ptr = ~mask & m->outer_l2_len;
246                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
247
248                 /* Outer L4 */
249                 w1.ol4type = csum + (csum << 1);
250
251                 /* Inner L3 */
252                 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
253                              ((!!(ol_flags & PKT_TX_IPV6)) << 2);
254                 w1.il3ptr = w1.ol4ptr + m->l2_len;
255                 w1.il4ptr = w1.il3ptr + m->l3_len;
256                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
257                 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
258
259                 /* Inner L4 */
260                 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
261
262                 /* In case of no tunnel header use only
263                  * shift IL3/IL4 fields a bit to use
264                  * OL3/OL4 for header checksum
265                  */
266                 mask = !ol3type;
267                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
268                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
269
270         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
271                 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
272                 const uint8_t outer_l2_len = m->outer_l2_len;
273
274                 /* Outer L3 */
275                 w1.ol3ptr = outer_l2_len;
276                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
277                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
278                 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
279                              ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
280                              !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
281
282                 /* Outer L4 */
283                 w1.ol4type = csum + (csum << 1);
284
285         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
286                 const uint8_t l2_len = m->l2_len;
287
288                 /* Always use OLXPTR and OLXTYPE when only
289                  * when one header is present
290                  */
291
292                 /* Inner L3 */
293                 w1.ol3ptr = l2_len;
294                 w1.ol4ptr = l2_len + m->l3_len;
295                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
296                 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
297                              ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
298                              !!(ol_flags & PKT_TX_IP_CKSUM);
299
300                 /* Inner L4 */
301                 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
302         }
303
304         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
305                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
306                 /* HW will update ptr after vlan0 update */
307                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
308                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
309
310                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
311                 /* 2B before end of l2 header */
312                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
313                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
314         }
315
316         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
317                 uint16_t lso_sb;
318                 uint64_t mask;
319
320                 mask = -(!w1.il3type);
321                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
322
323                 send_hdr_ext->w0.lso_sb = lso_sb;
324                 send_hdr_ext->w0.lso = 1;
325                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
326                 send_hdr_ext->w0.lso_format =
327                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
328                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
329
330                 /* Handle tunnel tso */
331                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
332                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
333                         const uint8_t is_udp_tun =
334                                 (CNXK_NIX_UDP_TUN_BITMASK >>
335                                  ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
336                                 0x1;
337                         uint8_t shift = is_udp_tun ? 32 : 0;
338
339                         shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
340                         shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
341
342                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
343                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
344                         /* Update format for UDP tunneled packet */
345                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
346                 }
347         }
348
349         if (flags & NIX_TX_NEED_SEND_HDR_W1)
350                 send_hdr->w1.u = w1.u;
351
352         if (!(flags & NIX_TX_MULTI_SEG_F)) {
353                 sg->seg1_size = m->data_len;
354                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
355
356                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
357                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
358                          *              is greater than 1
359                          * DF bit = 0 otherwise
360                          */
361                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
362                 }
363                 /* Mark mempool object as "put" since it is freed by NIX */
364                 if (!send_hdr->w0.df)
365                         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
366         }
367
368         /* With minimal offloads, 'cmd' being local could be optimized out to
369          * registers. In other cases, 'cmd' will be in stack. Intent is
370          * 'cmd' stores content from txq->cmd which is copied only once.
371          */
372         *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
373         lmt_addr += 16;
374         if (flags & NIX_TX_NEED_EXT_HDR) {
375                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
376                 lmt_addr += 16;
377         }
378         /* In case of multi-seg, sg template is stored here */
379         *((union nix_send_sg_s *)lmt_addr) = *sg;
380         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
381 }
382
383 static __rte_always_inline uint16_t
384 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
385 {
386         struct nix_send_hdr_s *send_hdr;
387         union nix_send_sg_s *sg;
388         struct rte_mbuf *m_next;
389         uint64_t *slist, sg_u;
390         uint64_t nb_segs;
391         uint64_t segdw;
392         uint8_t off, i;
393
394         send_hdr = (struct nix_send_hdr_s *)cmd;
395         send_hdr->w0.total = m->pkt_len;
396         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
397
398         if (flags & NIX_TX_NEED_EXT_HDR)
399                 off = 2;
400         else
401                 off = 0;
402
403         sg = (union nix_send_sg_s *)&cmd[2 + off];
404         /* Clear sg->u header before use */
405         sg->u &= 0xFC00000000000000;
406         sg_u = sg->u;
407         slist = &cmd[3 + off];
408
409         i = 0;
410         nb_segs = m->nb_segs;
411
412         /* Fill mbuf segments */
413         do {
414                 m_next = m->next;
415                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
416                 *slist = rte_mbuf_data_iova(m);
417                 /* Set invert df if buffer is not to be freed by H/W */
418                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
419                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
420                         /* Mark mempool object as "put" since it is freed by NIX
421                          */
422 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
423                 if (!(sg_u & (1ULL << (i + 55))))
424                         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
425 #endif
426                 slist++;
427                 i++;
428                 nb_segs--;
429                 if (i > 2 && nb_segs) {
430                         i = 0;
431                         /* Next SG subdesc */
432                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
433                         sg->u = sg_u;
434                         sg->segs = 3;
435                         sg = (union nix_send_sg_s *)slist;
436                         sg_u = sg->u;
437                         slist++;
438                 }
439                 m = m_next;
440         } while (nb_segs);
441
442         sg->u = sg_u;
443         sg->segs = i;
444         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
445         /* Roundup extra dwords to multiple of 2 */
446         segdw = (segdw >> 1) + (segdw & 0x1);
447         /* Default dwords */
448         segdw += (off >> 1) + 1;
449         send_hdr->w0.sizem1 = segdw - 1;
450
451         return segdw;
452 }
453
454 static __rte_always_inline uint16_t
455 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
456                     uint64_t *cmd, const uint16_t flags)
457 {
458         struct cn10k_eth_txq *txq = tx_queue;
459         const rte_iova_t io_addr = txq->io_addr;
460         uintptr_t pa, lmt_addr = txq->lmt_base;
461         uint16_t lmt_id, burst, left, i;
462         uint64_t lso_tun_fmt;
463         uint64_t data;
464
465         NIX_XMIT_FC_OR_RETURN(txq, pkts);
466
467         /* Get cmd skeleton */
468         cn10k_nix_tx_skeleton(txq, cmd, flags);
469
470         /* Reduce the cached count */
471         txq->fc_cache_pkts -= pkts;
472
473         if (flags & NIX_TX_OFFLOAD_TSO_F)
474                 lso_tun_fmt = txq->lso_tun_fmt;
475
476         /* Get LMT base address and LMT ID as lcore id */
477         ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
478         left = pkts;
479 again:
480         burst = left > 32 ? 32 : left;
481         for (i = 0; i < burst; i++) {
482                 /* Perform header writes for TSO, barrier at
483                  * lmt steorl will suffice.
484                  */
485                 if (flags & NIX_TX_OFFLOAD_TSO_F)
486                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
487
488                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
489                                        lso_tun_fmt);
490                 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
491         }
492
493         /* Trigger LMTST */
494         if (burst > 16) {
495                 data = cn10k_nix_tx_steor_data(flags);
496                 pa = io_addr | (data & 0x7) << 4;
497                 data &= ~0x7ULL;
498                 data |= (15ULL << 12);
499                 data |= (uint64_t)lmt_id;
500
501                 /* STEOR0 */
502                 roc_lmt_submit_steorl(data, pa);
503
504                 data = cn10k_nix_tx_steor_data(flags);
505                 pa = io_addr | (data & 0x7) << 4;
506                 data &= ~0x7ULL;
507                 data |= ((uint64_t)(burst - 17)) << 12;
508                 data |= (uint64_t)(lmt_id + 16);
509
510                 /* STEOR1 */
511                 roc_lmt_submit_steorl(data, pa);
512         } else if (burst) {
513                 data = cn10k_nix_tx_steor_data(flags);
514                 pa = io_addr | (data & 0x7) << 4;
515                 data &= ~0x7ULL;
516                 data |= ((uint64_t)(burst - 1)) << 12;
517                 data |= lmt_id;
518
519                 /* STEOR0 */
520                 roc_lmt_submit_steorl(data, pa);
521         }
522
523         left -= burst;
524         rte_io_wmb();
525         if (left) {
526                 /* Start processing another burst */
527                 tx_pkts += burst;
528                 /* Reset lmt base addr */
529                 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
530                 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
531                 goto again;
532         }
533
534         return pkts;
535 }
536
537 static __rte_always_inline uint16_t
538 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
539                          uint16_t pkts, uint64_t *cmd, const uint16_t flags)
540 {
541         struct cn10k_eth_txq *txq = tx_queue;
542         uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
543         const rte_iova_t io_addr = txq->io_addr;
544         uint16_t segdw, lmt_id, burst, left, i;
545         uint64_t data0, data1;
546         uint64_t lso_tun_fmt;
547         __uint128_t data128;
548         uint16_t shft;
549
550         NIX_XMIT_FC_OR_RETURN(txq, pkts);
551
552         cn10k_nix_tx_skeleton(txq, cmd, flags);
553
554         /* Reduce the cached count */
555         txq->fc_cache_pkts -= pkts;
556
557         if (flags & NIX_TX_OFFLOAD_TSO_F)
558                 lso_tun_fmt = txq->lso_tun_fmt;
559
560         /* Get LMT base address and LMT ID as lcore id */
561         ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
562         left = pkts;
563 again:
564         burst = left > 32 ? 32 : left;
565         shft = 16;
566         data128 = 0;
567         for (i = 0; i < burst; i++) {
568                 /* Perform header writes for TSO, barrier at
569                  * lmt steorl will suffice.
570                  */
571                 if (flags & NIX_TX_OFFLOAD_TSO_F)
572                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
573
574                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
575                                        lso_tun_fmt);
576                 /* Store sg list directly on lmt line */
577                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
578                                                flags);
579                 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
580                 data128 |= (((__uint128_t)(segdw - 1)) << shft);
581                 shft += 3;
582         }
583
584         data0 = (uint64_t)data128;
585         data1 = (uint64_t)(data128 >> 64);
586         /* Make data0 similar to data1 */
587         data0 >>= 16;
588         /* Trigger LMTST */
589         if (burst > 16) {
590                 pa0 = io_addr | (data0 & 0x7) << 4;
591                 data0 &= ~0x7ULL;
592                 /* Move lmtst1..15 sz to bits 63:19 */
593                 data0 <<= 16;
594                 data0 |= (15ULL << 12);
595                 data0 |= (uint64_t)lmt_id;
596
597                 /* STEOR0 */
598                 roc_lmt_submit_steorl(data0, pa0);
599
600                 pa1 = io_addr | (data1 & 0x7) << 4;
601                 data1 &= ~0x7ULL;
602                 data1 <<= 16;
603                 data1 |= ((uint64_t)(burst - 17)) << 12;
604                 data1 |= (uint64_t)(lmt_id + 16);
605
606                 /* STEOR1 */
607                 roc_lmt_submit_steorl(data1, pa1);
608         } else if (burst) {
609                 pa0 = io_addr | (data0 & 0x7) << 4;
610                 data0 &= ~0x7ULL;
611                 /* Move lmtst1..15 sz to bits 63:19 */
612                 data0 <<= 16;
613                 data0 |= ((burst - 1) << 12);
614                 data0 |= (uint64_t)lmt_id;
615
616                 /* STEOR0 */
617                 roc_lmt_submit_steorl(data0, pa0);
618         }
619
620         left -= burst;
621         rte_io_wmb();
622         if (left) {
623                 /* Start processing another burst */
624                 tx_pkts += burst;
625                 /* Reset lmt base addr */
626                 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
627                 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
628                 goto again;
629         }
630
631         return pkts;
632 }
633
634 #if defined(RTE_ARCH_ARM64)
635
636 #define NIX_DESCS_PER_LOOP 4
637 static __rte_always_inline uint16_t
638 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
639                            uint16_t pkts, uint64_t *cmd, const uint16_t flags)
640 {
641         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
642         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
643         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
644         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
645         uint64x2_t senddesc01_w0, senddesc23_w0;
646         uint64x2_t senddesc01_w1, senddesc23_w1;
647         uint16_t left, scalar, burst, i, lmt_id;
648         uint64x2_t sgdesc01_w0, sgdesc23_w0;
649         uint64x2_t sgdesc01_w1, sgdesc23_w1;
650         struct cn10k_eth_txq *txq = tx_queue;
651         uintptr_t laddr = txq->lmt_base;
652         rte_iova_t io_addr = txq->io_addr;
653         uint64x2_t ltypes01, ltypes23;
654         uint64x2_t xtmp128, ytmp128;
655         uint64x2_t xmask01, xmask23;
656         uint8_t lnum;
657
658         NIX_XMIT_FC_OR_RETURN(txq, pkts);
659
660         scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
661         pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
662
663         /* Reduce the cached count */
664         txq->fc_cache_pkts -= pkts;
665
666         senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
667         senddesc23_w0 = senddesc01_w0;
668         senddesc01_w1 = vdupq_n_u64(0);
669         senddesc23_w1 = senddesc01_w1;
670         sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
671         sgdesc23_w0 = sgdesc01_w0;
672
673         /* Get LMT base address and LMT ID as lcore id */
674         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
675         left = pkts;
676 again:
677         /* Number of packets to prepare depends on offloads enabled. */
678         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
679                               cn10k_nix_pkts_per_vec_brst(flags) :
680                               left;
681         lnum = 0;
682         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
683                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
684                 senddesc01_w0 =
685                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
686                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
687
688                 senddesc23_w0 = senddesc01_w0;
689                 sgdesc23_w0 = sgdesc01_w0;
690
691                 /* Move mbufs to iova */
692                 mbuf0 = (uint64_t *)tx_pkts[0];
693                 mbuf1 = (uint64_t *)tx_pkts[1];
694                 mbuf2 = (uint64_t *)tx_pkts[2];
695                 mbuf3 = (uint64_t *)tx_pkts[3];
696
697                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
698                                      offsetof(struct rte_mbuf, buf_iova));
699                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
700                                      offsetof(struct rte_mbuf, buf_iova));
701                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
702                                      offsetof(struct rte_mbuf, buf_iova));
703                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
704                                      offsetof(struct rte_mbuf, buf_iova));
705                 /*
706                  * Get mbuf's, olflags, iova, pktlen, dataoff
707                  * dataoff_iovaX.D[0] = iova,
708                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
709                  * len_olflagsX.D[0] = ol_flags,
710                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
711                  */
712                 dataoff_iova0 = vld1q_u64(mbuf0);
713                 len_olflags0 = vld1q_u64(mbuf0 + 2);
714                 dataoff_iova1 = vld1q_u64(mbuf1);
715                 len_olflags1 = vld1q_u64(mbuf1 + 2);
716                 dataoff_iova2 = vld1q_u64(mbuf2);
717                 len_olflags2 = vld1q_u64(mbuf2 + 2);
718                 dataoff_iova3 = vld1q_u64(mbuf3);
719                 len_olflags3 = vld1q_u64(mbuf3 + 2);
720
721                 /* Move mbufs to point pool */
722                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
723                                      offsetof(struct rte_mbuf, pool) -
724                                      offsetof(struct rte_mbuf, buf_iova));
725                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
726                                      offsetof(struct rte_mbuf, pool) -
727                                      offsetof(struct rte_mbuf, buf_iova));
728                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
729                                      offsetof(struct rte_mbuf, pool) -
730                                      offsetof(struct rte_mbuf, buf_iova));
731                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
732                                      offsetof(struct rte_mbuf, pool) -
733                                      offsetof(struct rte_mbuf, buf_iova));
734
735                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
736                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
737                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
738                         /*
739                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
740                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
741                          */
742
743                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
744                                      : [a] "+w"(senddesc01_w1)
745                                      : [in] "r"(mbuf0 + 2)
746                                      : "memory");
747
748                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
749                                      : [a] "+w"(senddesc01_w1)
750                                      : [in] "r"(mbuf1 + 2)
751                                      : "memory");
752
753                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
754                                      : [b] "+w"(senddesc23_w1)
755                                      : [in] "r"(mbuf2 + 2)
756                                      : "memory");
757
758                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
759                                      : [b] "+w"(senddesc23_w1)
760                                      : [in] "r"(mbuf3 + 2)
761                                      : "memory");
762
763                         /* Get pool pointer alone */
764                         mbuf0 = (uint64_t *)*mbuf0;
765                         mbuf1 = (uint64_t *)*mbuf1;
766                         mbuf2 = (uint64_t *)*mbuf2;
767                         mbuf3 = (uint64_t *)*mbuf3;
768                 } else {
769                         /* Get pool pointer alone */
770                         mbuf0 = (uint64_t *)*mbuf0;
771                         mbuf1 = (uint64_t *)*mbuf1;
772                         mbuf2 = (uint64_t *)*mbuf2;
773                         mbuf3 = (uint64_t *)*mbuf3;
774                 }
775
776                 const uint8x16_t shuf_mask2 = {
777                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
778                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
779                 };
780                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
781                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
782
783                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
784                 const uint64x2_t and_mask0 = {
785                         0xFFFFFFFFFFFFFFFF,
786                         0x000000000000FFFF,
787                 };
788
789                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
790                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
791                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
792                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
793
794                 /*
795                  * Pick only 16 bits of pktlen preset at bits 63:32
796                  * and place them at bits 15:0.
797                  */
798                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
799                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
800
801                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
802                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
803                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
804
805                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
806                  * pktlen at 15:0 position.
807                  */
808                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
809                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
810                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
811                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
812
813                 /* Move mbuf to point to pool_id. */
814                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
815                                      offsetof(struct rte_mempool, pool_id));
816                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
817                                      offsetof(struct rte_mempool, pool_id));
818                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
819                                      offsetof(struct rte_mempool, pool_id));
820                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
821                                      offsetof(struct rte_mempool, pool_id));
822
823                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
824                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
825                         /*
826                          * Lookup table to translate ol_flags to
827                          * il3/il4 types. But we still use ol3/ol4 types in
828                          * senddesc_w1 as only one header processing is enabled.
829                          */
830                         const uint8x16_t tbl = {
831                                 /* [0-15] = il4type:il3type */
832                                 0x04, /* none (IPv6 assumed) */
833                                 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
834                                 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
835                                 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
836                                 0x03, /* PKT_TX_IP_CKSUM */
837                                 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
838                                 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
839                                 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
840                                 0x02, /* PKT_TX_IPV4  */
841                                 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
842                                 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
843                                 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
844                                 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
845                                 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
846                                        * PKT_TX_TCP_CKSUM
847                                        */
848                                 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
849                                        * PKT_TX_SCTP_CKSUM
850                                        */
851                                 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
852                                        * PKT_TX_UDP_CKSUM
853                                        */
854                         };
855
856                         /* Extract olflags to translate to iltypes */
857                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
858                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
859
860                         /*
861                          * E(47):L3_LEN(9):L2_LEN(7+z)
862                          * E(47):L3_LEN(9):L2_LEN(7+z)
863                          */
864                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
865                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
866
867                         /* Move OLFLAGS bits 55:52 to 51:48
868                          * with zeros preprended on the byte and rest
869                          * don't care
870                          */
871                         xtmp128 = vshrq_n_u8(xtmp128, 4);
872                         ytmp128 = vshrq_n_u8(ytmp128, 4);
873                         /*
874                          * E(48):L3_LEN(8):L2_LEN(z+7)
875                          * E(48):L3_LEN(8):L2_LEN(z+7)
876                          */
877                         const int8x16_t tshft3 = {
878                                 -1, 0, 8, 8, 8, 8, 8, 8,
879                                 -1, 0, 8, 8, 8, 8, 8, 8,
880                         };
881
882                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
883                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
884
885                         /* Do the lookup */
886                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
887                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
888
889                         /* Pick only relevant fields i.e Bit 48:55 of iltype
890                          * and place it in ol3/ol4type of senddesc_w1
891                          */
892                         const uint8x16_t shuf_mask0 = {
893                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
894                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
895                         };
896
897                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
898                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
899
900                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
901                          * a [E(32):E(16):OL3(8):OL2(8)]
902                          * a = a + (a << 8)
903                          * a [E(32):E(16):(OL3+OL2):OL2]
904                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
905                          */
906                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
907                                                  vshlq_n_u16(senddesc01_w1, 8));
908                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
909                                                  vshlq_n_u16(senddesc23_w1, 8));
910
911                         /* Move ltypes to senddesc*_w1 */
912                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
913                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
914                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
915                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
916                         /*
917                          * Lookup table to translate ol_flags to
918                          * ol3/ol4 types.
919                          */
920
921                         const uint8x16_t tbl = {
922                                 /* [0-15] = ol4type:ol3type */
923                                 0x00, /* none */
924                                 0x03, /* OUTER_IP_CKSUM */
925                                 0x02, /* OUTER_IPV4 */
926                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
927                                 0x04, /* OUTER_IPV6 */
928                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
929                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
930                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
931                                        * OUTER_IP_CKSUM
932                                        */
933                                 0x00, /* OUTER_UDP_CKSUM */
934                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
935                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
936                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
937                                        * OUTER_IP_CKSUM
938                                        */
939                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
940                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
941                                        * OUTER_IP_CKSUM
942                                        */
943                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
944                                        * OUTER_IPV4
945                                        */
946                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
947                                        * OUTER_IPV4 | OUTER_IP_CKSUM
948                                        */
949                         };
950
951                         /* Extract olflags to translate to iltypes */
952                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
953                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
954
955                         /*
956                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
957                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
958                          */
959                         const uint8x16_t shuf_mask5 = {
960                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
961                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
962                         };
963                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
964                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
965
966                         /* Extract outer ol flags only */
967                         const uint64x2_t o_cksum_mask = {
968                                 0x1C00020000000000,
969                                 0x1C00020000000000,
970                         };
971
972                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
973                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
974
975                         /* Extract OUTER_UDP_CKSUM bit 41 and
976                          * move it to bit 61
977                          */
978
979                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
980                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
981
982                         /* Shift oltype by 2 to start nibble from BIT(56)
983                          * instead of BIT(58)
984                          */
985                         xtmp128 = vshrq_n_u8(xtmp128, 2);
986                         ytmp128 = vshrq_n_u8(ytmp128, 2);
987                         /*
988                          * E(48):L3_LEN(8):L2_LEN(z+7)
989                          * E(48):L3_LEN(8):L2_LEN(z+7)
990                          */
991                         const int8x16_t tshft3 = {
992                                 -1, 0, 8, 8, 8, 8, 8, 8,
993                                 -1, 0, 8, 8, 8, 8, 8, 8,
994                         };
995
996                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
997                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
998
999                         /* Do the lookup */
1000                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1001                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1002
1003                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1004                          * and place it in ol3/ol4type of senddesc_w1
1005                          */
1006                         const uint8x16_t shuf_mask0 = {
1007                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1008                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1009                         };
1010
1011                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1012                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1013
1014                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1015                          * a [E(32):E(16):OL3(8):OL2(8)]
1016                          * a = a + (a << 8)
1017                          * a [E(32):E(16):(OL3+OL2):OL2]
1018                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1019                          */
1020                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1021                                                  vshlq_n_u16(senddesc01_w1, 8));
1022                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1023                                                  vshlq_n_u16(senddesc23_w1, 8));
1024
1025                         /* Move ltypes to senddesc*_w1 */
1026                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1027                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1028                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1029                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1030                         /* Lookup table to translate ol_flags to
1031                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1032                          */
1033                         const uint8x16x2_t tbl = {{
1034                                 {
1035                                         /* [0-15] = il4type:il3type */
1036                                         0x04, /* none (IPv6) */
1037                                         0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1038                                         0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1039                                         0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1040                                         0x03, /* PKT_TX_IP_CKSUM */
1041                                         0x13, /* PKT_TX_IP_CKSUM |
1042                                                * PKT_TX_TCP_CKSUM
1043                                                */
1044                                         0x23, /* PKT_TX_IP_CKSUM |
1045                                                * PKT_TX_SCTP_CKSUM
1046                                                */
1047                                         0x33, /* PKT_TX_IP_CKSUM |
1048                                                * PKT_TX_UDP_CKSUM
1049                                                */
1050                                         0x02, /* PKT_TX_IPV4 */
1051                                         0x12, /* PKT_TX_IPV4 |
1052                                                * PKT_TX_TCP_CKSUM
1053                                                */
1054                                         0x22, /* PKT_TX_IPV4 |
1055                                                * PKT_TX_SCTP_CKSUM
1056                                                */
1057                                         0x32, /* PKT_TX_IPV4 |
1058                                                * PKT_TX_UDP_CKSUM
1059                                                */
1060                                         0x03, /* PKT_TX_IPV4 |
1061                                                * PKT_TX_IP_CKSUM
1062                                                */
1063                                         0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1064                                                * PKT_TX_TCP_CKSUM
1065                                                */
1066                                         0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1067                                                * PKT_TX_SCTP_CKSUM
1068                                                */
1069                                         0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1070                                                * PKT_TX_UDP_CKSUM
1071                                                */
1072                                 },
1073
1074                                 {
1075                                         /* [16-31] = ol4type:ol3type */
1076                                         0x00, /* none */
1077                                         0x03, /* OUTER_IP_CKSUM */
1078                                         0x02, /* OUTER_IPV4 */
1079                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1080                                         0x04, /* OUTER_IPV6 */
1081                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1082                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1083                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1084                                                * OUTER_IP_CKSUM
1085                                                */
1086                                         0x00, /* OUTER_UDP_CKSUM */
1087                                         0x33, /* OUTER_UDP_CKSUM |
1088                                                * OUTER_IP_CKSUM
1089                                                */
1090                                         0x32, /* OUTER_UDP_CKSUM |
1091                                                * OUTER_IPV4
1092                                                */
1093                                         0x33, /* OUTER_UDP_CKSUM |
1094                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1095                                                */
1096                                         0x34, /* OUTER_UDP_CKSUM |
1097                                                * OUTER_IPV6
1098                                                */
1099                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1100                                                * OUTER_IP_CKSUM
1101                                                */
1102                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1103                                                * OUTER_IPV4
1104                                                */
1105                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1106                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1107                                                */
1108                                 },
1109                         }};
1110
1111                         /* Extract olflags to translate to oltype & iltype */
1112                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1113                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1114
1115                         /*
1116                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1117                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1118                          */
1119                         const uint32x4_t tshft_4 = {
1120                                 1,
1121                                 0,
1122                                 1,
1123                                 0,
1124                         };
1125                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1126                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1127
1128                         /*
1129                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1130                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1131                          */
1132                         const uint8x16_t shuf_mask5 = {
1133                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1134                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1135                         };
1136                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1137                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1138
1139                         /* Extract outer and inner header ol_flags */
1140                         const uint64x2_t oi_cksum_mask = {
1141                                 0x1CF0020000000000,
1142                                 0x1CF0020000000000,
1143                         };
1144
1145                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1146                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1147
1148                         /* Extract OUTER_UDP_CKSUM bit 41 and
1149                          * move it to bit 61
1150                          */
1151
1152                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1153                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1154
1155                         /* Shift right oltype by 2 and iltype by 4
1156                          * to start oltype nibble from BIT(58)
1157                          * instead of BIT(56) and iltype nibble from BIT(48)
1158                          * instead of BIT(52).
1159                          */
1160                         const int8x16_t tshft5 = {
1161                                 8, 8, 8, 8, 8, 8, -4, -2,
1162                                 8, 8, 8, 8, 8, 8, -4, -2,
1163                         };
1164
1165                         xtmp128 = vshlq_u8(xtmp128, tshft5);
1166                         ytmp128 = vshlq_u8(ytmp128, tshft5);
1167                         /*
1168                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1169                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1170                          */
1171                         const int8x16_t tshft3 = {
1172                                 -1, 0, -1, 0, 0, 0, 0, 0,
1173                                 -1, 0, -1, 0, 0, 0, 0, 0,
1174                         };
1175
1176                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1177                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1178
1179                         /* Mark Bit(4) of oltype */
1180                         const uint64x2_t oi_cksum_mask2 = {
1181                                 0x1000000000000000,
1182                                 0x1000000000000000,
1183                         };
1184
1185                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1186                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1187
1188                         /* Do the lookup */
1189                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1190                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1191
1192                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
1193                          * Bit 56:63 of oltype and place it in corresponding
1194                          * place in senddesc_w1.
1195                          */
1196                         const uint8x16_t shuf_mask0 = {
1197                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1198                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1199                         };
1200
1201                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1202                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1203
1204                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1205                          * l3len, l2len, ol3len, ol2len.
1206                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1207                          * a = a + (a << 8)
1208                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1209                          * a = a + (a << 16)
1210                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1211                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1212                          */
1213                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1214                                                  vshlq_n_u32(senddesc01_w1, 8));
1215                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1216                                                  vshlq_n_u32(senddesc23_w1, 8));
1217
1218                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1219                         senddesc01_w1 = vaddq_u8(
1220                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1221                         senddesc23_w1 = vaddq_u8(
1222                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1223
1224                         /* Move ltypes to senddesc*_w1 */
1225                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1226                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1227                 }
1228
1229                 xmask01 = vdupq_n_u64(0);
1230                 xmask23 = xmask01;
1231                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1232                              : [a] "+w"(xmask01)
1233                              : [in] "r"(mbuf0)
1234                              : "memory");
1235
1236                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1237                              : [a] "+w"(xmask01)
1238                              : [in] "r"(mbuf1)
1239                              : "memory");
1240
1241                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1242                              : [b] "+w"(xmask23)
1243                              : [in] "r"(mbuf2)
1244                              : "memory");
1245
1246                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1247                              : [b] "+w"(xmask23)
1248                              : [in] "r"(mbuf3)
1249                              : "memory");
1250                 xmask01 = vshlq_n_u64(xmask01, 20);
1251                 xmask23 = vshlq_n_u64(xmask23, 20);
1252
1253                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1254                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1255
1256                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1257                         /* Set don't free bit if reference count > 1 */
1258                         xmask01 = vdupq_n_u64(0);
1259                         xmask23 = xmask01;
1260
1261                         /* Move mbufs to iova */
1262                         mbuf0 = (uint64_t *)tx_pkts[0];
1263                         mbuf1 = (uint64_t *)tx_pkts[1];
1264                         mbuf2 = (uint64_t *)tx_pkts[2];
1265                         mbuf3 = (uint64_t *)tx_pkts[3];
1266
1267                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1268                                 vsetq_lane_u64(0x80000, xmask01, 0);
1269                         else
1270                                 __mempool_check_cookies(
1271                                         ((struct rte_mbuf *)mbuf0)->pool,
1272                                         (void **)&mbuf0, 1, 0);
1273
1274                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1275                                 vsetq_lane_u64(0x80000, xmask01, 1);
1276                         else
1277                                 __mempool_check_cookies(
1278                                         ((struct rte_mbuf *)mbuf1)->pool,
1279                                         (void **)&mbuf1, 1, 0);
1280
1281                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1282                                 vsetq_lane_u64(0x80000, xmask23, 0);
1283                         else
1284                                 __mempool_check_cookies(
1285                                         ((struct rte_mbuf *)mbuf2)->pool,
1286                                         (void **)&mbuf2, 1, 0);
1287
1288                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1289                                 vsetq_lane_u64(0x80000, xmask23, 1);
1290                         else
1291                                 __mempool_check_cookies(
1292                                         ((struct rte_mbuf *)mbuf3)->pool,
1293                                         (void **)&mbuf3, 1, 0);
1294                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1295                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1296                 } else {
1297                         /* Move mbufs to iova */
1298                         mbuf0 = (uint64_t *)tx_pkts[0];
1299                         mbuf1 = (uint64_t *)tx_pkts[1];
1300                         mbuf2 = (uint64_t *)tx_pkts[2];
1301                         mbuf3 = (uint64_t *)tx_pkts[3];
1302
1303                         /* Mark mempool object as "put" since
1304                          * it is freed by NIX
1305                          */
1306                         __mempool_check_cookies(
1307                                 ((struct rte_mbuf *)mbuf0)->pool,
1308                                 (void **)&mbuf0, 1, 0);
1309
1310                         __mempool_check_cookies(
1311                                 ((struct rte_mbuf *)mbuf1)->pool,
1312                                 (void **)&mbuf1, 1, 0);
1313
1314                         __mempool_check_cookies(
1315                                 ((struct rte_mbuf *)mbuf2)->pool,
1316                                 (void **)&mbuf2, 1, 0);
1317
1318                         __mempool_check_cookies(
1319                                 ((struct rte_mbuf *)mbuf3)->pool,
1320                                 (void **)&mbuf3, 1, 0);
1321                 }
1322
1323                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1324                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1325                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1326                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1327                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1328
1329                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1330                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1331                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1332                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1333
1334                 /* Store the prepared send desc to LMT lines */
1335                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1336                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
1337                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
1338                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
1339                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
1340                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
1341                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
1342                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
1343                 lnum += 1;
1344
1345                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1346         }
1347
1348         /* Trigger LMTST */
1349         if (lnum > 16) {
1350                 data = cn10k_nix_tx_steor_vec_data(flags);
1351                 pa = io_addr | (data & 0x7) << 4;
1352                 data &= ~0x7ULL;
1353                 data |= (15ULL << 12);
1354                 data |= (uint64_t)lmt_id;
1355
1356                 /* STEOR0 */
1357                 roc_lmt_submit_steorl(data, pa);
1358
1359                 data = cn10k_nix_tx_steor_vec_data(flags);
1360                 pa = io_addr | (data & 0x7) << 4;
1361                 data &= ~0x7ULL;
1362                 data |= ((uint64_t)(lnum - 17)) << 12;
1363                 data |= (uint64_t)(lmt_id + 16);
1364
1365                 /* STEOR1 */
1366                 roc_lmt_submit_steorl(data, pa);
1367         } else if (lnum) {
1368                 data = cn10k_nix_tx_steor_vec_data(flags);
1369                 pa = io_addr | (data & 0x7) << 4;
1370                 data &= ~0x7ULL;
1371                 data |= ((uint64_t)(lnum - 1)) << 12;
1372                 data |= lmt_id;
1373
1374                 /* STEOR0 */
1375                 roc_lmt_submit_steorl(data, pa);
1376         }
1377
1378         left -= burst;
1379         rte_io_wmb();
1380         if (left)
1381                 goto again;
1382
1383         if (unlikely(scalar))
1384                 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
1385                                             flags);
1386
1387         return pkts;
1388 }
1389
1390 #else
1391 static __rte_always_inline uint16_t
1392 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1393                            uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1394 {
1395         RTE_SET_USED(tx_queue);
1396         RTE_SET_USED(tx_pkts);
1397         RTE_SET_USED(pkts);
1398         RTE_SET_USED(cmd);
1399         RTE_SET_USED(flags);
1400         return 0;
1401 }
1402 #endif
1403
1404 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
1405 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1406 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
1407 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
1408 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
1409
1410 /* [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1411 #define NIX_TX_FASTPATH_MODES                                           \
1412 T(no_offload,                           0, 0, 0, 0, 0,  4,              \
1413                 NIX_TX_OFFLOAD_NONE)                                    \
1414 T(l3l4csum,                             0, 0, 0, 0, 1,  4,              \
1415                 L3L4CSUM_F)                                             \
1416 T(ol3ol4csum,                           0, 0, 0, 1, 0,  4,              \
1417                 OL3OL4CSUM_F)                                           \
1418 T(ol3ol4csum_l3l4csum,                  0, 0, 0, 1, 1,  4,              \
1419                 OL3OL4CSUM_F | L3L4CSUM_F)                              \
1420 T(vlan,                                 0, 0, 1, 0, 0,  6,              \
1421                 VLAN_F)                                                 \
1422 T(vlan_l3l4csum,                        0, 0, 1, 0, 1,  6,              \
1423                 VLAN_F | L3L4CSUM_F)                                    \
1424 T(vlan_ol3ol4csum,                      0, 0, 1, 1, 0,  6,              \
1425                 VLAN_F | OL3OL4CSUM_F)                                  \
1426 T(vlan_ol3ol4csum_l3l4csum,             0, 0, 1, 1, 1,  6,              \
1427                 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                     \
1428 T(noff,                                 0, 1, 0, 0, 0,  4,              \
1429                 NOFF_F)                                                 \
1430 T(noff_l3l4csum,                        0, 1, 0, 0, 1,  4,              \
1431                 NOFF_F | L3L4CSUM_F)                                    \
1432 T(noff_ol3ol4csum,                      0, 1, 0, 1, 0,  4,              \
1433                 NOFF_F | OL3OL4CSUM_F)                                  \
1434 T(noff_ol3ol4csum_l3l4csum,             0, 1, 0, 1, 1,  4,              \
1435                 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                     \
1436 T(noff_vlan,                            0, 1, 1, 0, 0,  6,              \
1437                 NOFF_F | VLAN_F)                                        \
1438 T(noff_vlan_l3l4csum,                   0, 1, 1, 0, 1,  6,              \
1439                 NOFF_F | VLAN_F | L3L4CSUM_F)                           \
1440 T(noff_vlan_ol3ol4csum,                 0, 1, 1, 1, 0,  6,              \
1441                 NOFF_F | VLAN_F | OL3OL4CSUM_F)                         \
1442 T(noff_vlan_ol3ol4csum_l3l4csum,        0, 1, 1, 1, 1,  6,              \
1443                 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)            \
1444 T(tso,                                  1, 0, 0, 0, 0,  6,              \
1445                 TSO_F)                                                  \
1446 T(tso_l3l4csum,                         1, 0, 0, 0, 1,  6,              \
1447                 TSO_F | L3L4CSUM_F)                                     \
1448 T(tso_ol3ol4csum,                       1, 0, 0, 1, 0,  6,              \
1449                 TSO_F | OL3OL4CSUM_F)                                   \
1450 T(tso_ol3ol4csum_l3l4csum,              1, 0, 0, 1, 1,  6,              \
1451                 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                      \
1452 T(tso_vlan,                             1, 0, 1, 0, 0,  6,              \
1453                 TSO_F | VLAN_F)                                         \
1454 T(tso_vlan_l3l4csum,                    1, 0, 1, 0, 1,  6,              \
1455                 TSO_F | VLAN_F | L3L4CSUM_F)                            \
1456 T(tso_vlan_ol3ol4csum,                  1, 0, 1, 1, 0,  6,              \
1457                 TSO_F | VLAN_F | OL3OL4CSUM_F)                          \
1458 T(tso_vlan_ol3ol4csum_l3l4csum,         1, 0, 1, 1, 1,  6,              \
1459                 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
1460 T(tso_noff,                             1, 1, 0, 0, 0,  6,              \
1461                 TSO_F | NOFF_F)                                         \
1462 T(tso_noff_l3l4csum,                    1, 1, 0, 0, 1,  6,              \
1463                 TSO_F | NOFF_F | L3L4CSUM_F)                            \
1464 T(tso_noff_ol3ol4csum,                  1, 1, 0, 1, 0,  6,              \
1465                 TSO_F | NOFF_F | OL3OL4CSUM_F)                          \
1466 T(tso_noff_ol3ol4csum_l3l4csum,         1, 1, 0, 1, 1,  6,              \
1467                 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
1468 T(tso_noff_vlan,                        1, 1, 1, 0, 0,  6,              \
1469                 TSO_F | NOFF_F | VLAN_F)                                \
1470 T(tso_noff_vlan_l3l4csum,               1, 1, 1, 0, 1,  6,              \
1471                 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                   \
1472 T(tso_noff_vlan_ol3ol4csum,             1, 1, 1, 1, 0,  6,              \
1473                 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                 \
1474 T(tso_noff_vlan_ol3ol4csum_l3l4csum,    1, 1, 1, 1, 1,  6,              \
1475                 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1476
1477 #define T(name, f4, f3, f2, f1, f0, sz, flags)                                 \
1478         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
1479                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
1480                                                                                \
1481         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
1482                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
1483                                                                                \
1484         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
1485                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1486
1487 NIX_TX_FASTPATH_MODES
1488 #undef T
1489
1490 #endif /* __CN10K_TX_H__ */