8b1446f25c737383b74725b7e6ba0615cbfd3233
[dpdk.git] / drivers / net / cnxk / cn10k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN10K_TX_H__
5 #define __CN10K_TX_H__
6
7 #include <rte_vect.h>
8
9 #define NIX_TX_OFFLOAD_NONE           (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
16
17 /* Flags to control xmit_prepare function.
18  * Defining it from backwards to denote its been
19  * not used as offload flags to pick function
20  */
21 #define NIX_TX_MULTI_SEG_F BIT(15)
22
23 #define NIX_TX_NEED_SEND_HDR_W1                                                \
24         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
25          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
26
27 #define NIX_TX_NEED_EXT_HDR                                                    \
28         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
29          NIX_TX_OFFLOAD_TSO_F)
30
31 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
32         do {                                                                   \
33                 /* Cached value is low, Update the fc_cache_pkts */            \
34                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
35                         /* Multiply with sqe_per_sqb to express in pkts */     \
36                         (txq)->fc_cache_pkts =                                 \
37                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
38                                 << (txq)->sqes_per_sqb_log2;                   \
39                         /* Check it again for the room */                      \
40                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
41                                 return 0;                                      \
42                 }                                                              \
43         } while (0)
44
45 #define LMT_OFF(lmt_addr, lmt_num, offset)                                     \
46         (void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
47
48 /* Function to determine no of tx subdesc required in case ext
49  * sub desc is enabled.
50  */
51 static __rte_always_inline int
52 cn10k_nix_tx_ext_subs(const uint16_t flags)
53 {
54         return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
55                        ? 2
56                        : ((flags &
57                            (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
58                                   ? 1
59                                   : 0);
60 }
61
62 static __rte_always_inline uint8_t
63 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
64 {
65         RTE_SET_USED(flags);
66         /* We can pack up to 4 packets per LMTLINE if there are no offloads. */
67         return 4 << ROC_LMT_LINES_PER_CORE_LOG2;
68 }
69
70 static __rte_always_inline uint64_t
71 cn10k_nix_tx_steor_data(const uint16_t flags)
72 {
73         const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
74         uint64_t data;
75
76         /* This will be moved to addr area */
77         data = dw_m1;
78         /* 15 vector sizes for single seg */
79         data |= dw_m1 << 19;
80         data |= dw_m1 << 22;
81         data |= dw_m1 << 25;
82         data |= dw_m1 << 28;
83         data |= dw_m1 << 31;
84         data |= dw_m1 << 34;
85         data |= dw_m1 << 37;
86         data |= dw_m1 << 40;
87         data |= dw_m1 << 43;
88         data |= dw_m1 << 46;
89         data |= dw_m1 << 49;
90         data |= dw_m1 << 52;
91         data |= dw_m1 << 55;
92         data |= dw_m1 << 58;
93         data |= dw_m1 << 61;
94
95         return data;
96 }
97
98 static __rte_always_inline uint64_t
99 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
100 {
101         const uint64_t dw_m1 = 0x7;
102         uint64_t data;
103
104         RTE_SET_USED(flags);
105         /* This will be moved to addr area */
106         data = dw_m1;
107         /* 15 vector sizes for single seg */
108         data |= dw_m1 << 19;
109         data |= dw_m1 << 22;
110         data |= dw_m1 << 25;
111         data |= dw_m1 << 28;
112         data |= dw_m1 << 31;
113         data |= dw_m1 << 34;
114         data |= dw_m1 << 37;
115         data |= dw_m1 << 40;
116         data |= dw_m1 << 43;
117         data |= dw_m1 << 46;
118         data |= dw_m1 << 49;
119         data |= dw_m1 << 52;
120         data |= dw_m1 << 55;
121         data |= dw_m1 << 58;
122         data |= dw_m1 << 61;
123
124         return data;
125 }
126
127 static __rte_always_inline void
128 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
129                       const uint16_t flags)
130 {
131         /* Send hdr */
132         cmd[0] = txq->send_hdr_w0;
133         cmd[1] = 0;
134         cmd += 2;
135
136         /* Send ext if present */
137         if (flags & NIX_TX_NEED_EXT_HDR) {
138                 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
139                 cmd += 2;
140         }
141
142         /* Send sg */
143         cmd[0] = txq->sg_w0;
144         cmd[1] = 0;
145 }
146
147 static __rte_always_inline void
148 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
149 {
150         uint64_t mask, ol_flags = m->ol_flags;
151
152         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
153                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
154                 uint16_t *iplen, *oiplen, *oudplen;
155                 uint16_t lso_sb, paylen;
156
157                 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
158                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
159                          m->l2_len + m->l3_len + m->l4_len;
160
161                 /* Reduce payload len from base headers */
162                 paylen = m->pkt_len - lso_sb;
163
164                 /* Get iplen position assuming no tunnel hdr */
165                 iplen = (uint16_t *)(mdata + m->l2_len +
166                                      (2 << !!(ol_flags & PKT_TX_IPV6)));
167                 /* Handle tunnel tso */
168                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
169                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
170                         const uint8_t is_udp_tun =
171                                 (CNXK_NIX_UDP_TUN_BITMASK >>
172                                  ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
173                                 0x1;
174
175                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
176                                               (2 << !!(ol_flags &
177                                                        PKT_TX_OUTER_IPV6)));
178                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
179                                                    paylen);
180
181                         /* Update format for UDP tunneled packet */
182                         if (is_udp_tun) {
183                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
184                                                        m->outer_l3_len + 4);
185                                 *oudplen = rte_cpu_to_be_16(
186                                         rte_be_to_cpu_16(*oudplen) - paylen);
187                         }
188
189                         /* Update iplen position to inner ip hdr */
190                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
191                                              m->l4_len +
192                                              (2 << !!(ol_flags & PKT_TX_IPV6)));
193                 }
194
195                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
196         }
197 }
198
199 static __rte_always_inline void
200 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
201                        const uint16_t flags, const uint64_t lso_tun_fmt)
202 {
203         struct nix_send_ext_s *send_hdr_ext;
204         struct nix_send_hdr_s *send_hdr;
205         uint64_t ol_flags = 0, mask;
206         union nix_send_hdr_w1_u w1;
207         union nix_send_sg_s *sg;
208
209         send_hdr = (struct nix_send_hdr_s *)cmd;
210         if (flags & NIX_TX_NEED_EXT_HDR) {
211                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
212                 sg = (union nix_send_sg_s *)(cmd + 4);
213                 /* Clear previous markings */
214                 send_hdr_ext->w0.lso = 0;
215                 send_hdr_ext->w1.u = 0;
216         } else {
217                 sg = (union nix_send_sg_s *)(cmd + 2);
218         }
219
220         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
221                 ol_flags = m->ol_flags;
222                 w1.u = 0;
223         }
224
225         if (!(flags & NIX_TX_MULTI_SEG_F)) {
226                 send_hdr->w0.total = m->data_len;
227                 send_hdr->w0.aura =
228                         roc_npa_aura_handle_to_aura(m->pool->pool_id);
229         }
230
231         /*
232          * L3type:  2 => IPV4
233          *          3 => IPV4 with csum
234          *          4 => IPV6
235          * L3type and L3ptr needs to be set for either
236          * L3 csum or L4 csum or LSO
237          *
238          */
239
240         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
241             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
242                 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
243                 const uint8_t ol3type =
244                         ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
245                         ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
246                         !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
247
248                 /* Outer L3 */
249                 w1.ol3type = ol3type;
250                 mask = 0xffffull << ((!!ol3type) << 4);
251                 w1.ol3ptr = ~mask & m->outer_l2_len;
252                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
253
254                 /* Outer L4 */
255                 w1.ol4type = csum + (csum << 1);
256
257                 /* Inner L3 */
258                 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
259                              ((!!(ol_flags & PKT_TX_IPV6)) << 2);
260                 w1.il3ptr = w1.ol4ptr + m->l2_len;
261                 w1.il4ptr = w1.il3ptr + m->l3_len;
262                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
263                 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
264
265                 /* Inner L4 */
266                 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
267
268                 /* In case of no tunnel header use only
269                  * shift IL3/IL4 fields a bit to use
270                  * OL3/OL4 for header checksum
271                  */
272                 mask = !ol3type;
273                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
274                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
275
276         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
277                 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
278                 const uint8_t outer_l2_len = m->outer_l2_len;
279
280                 /* Outer L3 */
281                 w1.ol3ptr = outer_l2_len;
282                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
283                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
284                 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
285                              ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
286                              !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
287
288                 /* Outer L4 */
289                 w1.ol4type = csum + (csum << 1);
290
291         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
292                 const uint8_t l2_len = m->l2_len;
293
294                 /* Always use OLXPTR and OLXTYPE when only
295                  * when one header is present
296                  */
297
298                 /* Inner L3 */
299                 w1.ol3ptr = l2_len;
300                 w1.ol4ptr = l2_len + m->l3_len;
301                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
302                 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
303                              ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
304                              !!(ol_flags & PKT_TX_IP_CKSUM);
305
306                 /* Inner L4 */
307                 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
308         }
309
310         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
311                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
312                 /* HW will update ptr after vlan0 update */
313                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
314                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
315
316                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
317                 /* 2B before end of l2 header */
318                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
319                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
320         }
321
322         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
323                 uint16_t lso_sb;
324                 uint64_t mask;
325
326                 mask = -(!w1.il3type);
327                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
328
329                 send_hdr_ext->w0.lso_sb = lso_sb;
330                 send_hdr_ext->w0.lso = 1;
331                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
332                 send_hdr_ext->w0.lso_format =
333                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
334                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
335
336                 /* Handle tunnel tso */
337                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
338                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
339                         const uint8_t is_udp_tun =
340                                 (CNXK_NIX_UDP_TUN_BITMASK >>
341                                  ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
342                                 0x1;
343                         uint8_t shift = is_udp_tun ? 32 : 0;
344
345                         shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
346                         shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
347
348                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
349                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
350                         /* Update format for UDP tunneled packet */
351                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
352                 }
353         }
354
355         if (flags & NIX_TX_NEED_SEND_HDR_W1)
356                 send_hdr->w1.u = w1.u;
357
358         if (!(flags & NIX_TX_MULTI_SEG_F)) {
359                 sg->seg1_size = m->data_len;
360                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
361
362                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
363                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
364                          *              is greater than 1
365                          * DF bit = 0 otherwise
366                          */
367                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
368                 }
369                 /* Mark mempool object as "put" since it is freed by NIX */
370                 if (!send_hdr->w0.df)
371                         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
372         }
373
374         /* With minimal offloads, 'cmd' being local could be optimized out to
375          * registers. In other cases, 'cmd' will be in stack. Intent is
376          * 'cmd' stores content from txq->cmd which is copied only once.
377          */
378         *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
379         lmt_addr += 16;
380         if (flags & NIX_TX_NEED_EXT_HDR) {
381                 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
382                 lmt_addr += 16;
383         }
384         /* In case of multi-seg, sg template is stored here */
385         *((union nix_send_sg_s *)lmt_addr) = *sg;
386         *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
387 }
388
389 static __rte_always_inline void
390 cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
391                               const uint64_t ol_flags, const uint16_t no_segdw,
392                               const uint16_t flags)
393 {
394         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
395                 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
396                 struct nix_send_ext_s *send_hdr_ext =
397                                         (struct nix_send_ext_s *)lmt_addr + 16;
398                 uint64_t *lmt = (uint64_t *)lmt_addr;
399                 uint16_t off = (no_segdw - 1) << 1;
400                 struct nix_send_mem_s *send_mem;
401
402                 send_mem = (struct nix_send_mem_s *)(lmt + off);
403                 send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
404                 send_hdr_ext->w0.tstmp = 1;
405                 if (flags & NIX_TX_MULTI_SEG_F) {
406                         /* Retrieving the default desc values */
407                         lmt[off] = cmd[2];
408
409                         /* Using compiler barier to avoid voilation of C
410                          * aliasing rules.
411                          */
412                         rte_compiler_barrier();
413                 }
414
415                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
416                  * should not be recorded, hence changing the alg type to
417                  * NIX_SENDMEMALG_SET and also changing send mem addr field to
418                  * next 8 bytes as it corrpt the actual tx tstamp registered
419                  * address.
420                  */
421                 send_mem->w0.subdc = NIX_SUBDC_MEM;
422                 send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
423                 send_mem->addr =
424                         (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
425         }
426 }
427
428 static __rte_always_inline uint16_t
429 cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
430 {
431         struct nix_send_hdr_s *send_hdr;
432         union nix_send_sg_s *sg;
433         struct rte_mbuf *m_next;
434         uint64_t *slist, sg_u;
435         uint64_t nb_segs;
436         uint64_t segdw;
437         uint8_t off, i;
438
439         send_hdr = (struct nix_send_hdr_s *)cmd;
440         send_hdr->w0.total = m->pkt_len;
441         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
442
443         if (flags & NIX_TX_NEED_EXT_HDR)
444                 off = 2;
445         else
446                 off = 0;
447
448         sg = (union nix_send_sg_s *)&cmd[2 + off];
449         /* Clear sg->u header before use */
450         sg->u &= 0xFC00000000000000;
451         sg_u = sg->u;
452         slist = &cmd[3 + off];
453
454         i = 0;
455         nb_segs = m->nb_segs;
456
457         /* Fill mbuf segments */
458         do {
459                 m_next = m->next;
460                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
461                 *slist = rte_mbuf_data_iova(m);
462                 /* Set invert df if buffer is not to be freed by H/W */
463                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
464                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
465                         /* Mark mempool object as "put" since it is freed by NIX
466                          */
467 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
468                 if (!(sg_u & (1ULL << (i + 55))))
469                         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
470 #endif
471                 slist++;
472                 i++;
473                 nb_segs--;
474                 if (i > 2 && nb_segs) {
475                         i = 0;
476                         /* Next SG subdesc */
477                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
478                         sg->u = sg_u;
479                         sg->segs = 3;
480                         sg = (union nix_send_sg_s *)slist;
481                         sg_u = sg->u;
482                         slist++;
483                 }
484                 m = m_next;
485         } while (nb_segs);
486
487         sg->u = sg_u;
488         sg->segs = i;
489         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
490         /* Roundup extra dwords to multiple of 2 */
491         segdw = (segdw >> 1) + (segdw & 0x1);
492         /* Default dwords */
493         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
494         send_hdr->w0.sizem1 = segdw - 1;
495
496         return segdw;
497 }
498
499 static __rte_always_inline uint16_t
500 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
501                     uint64_t *cmd, const uint16_t flags)
502 {
503         struct cn10k_eth_txq *txq = tx_queue;
504         const rte_iova_t io_addr = txq->io_addr;
505         uintptr_t pa, lmt_addr = txq->lmt_base;
506         uint16_t lmt_id, burst, left, i;
507         uint64_t lso_tun_fmt;
508         uint64_t data;
509
510         NIX_XMIT_FC_OR_RETURN(txq, pkts);
511
512         /* Get cmd skeleton */
513         cn10k_nix_tx_skeleton(txq, cmd, flags);
514
515         /* Reduce the cached count */
516         txq->fc_cache_pkts -= pkts;
517
518         if (flags & NIX_TX_OFFLOAD_TSO_F)
519                 lso_tun_fmt = txq->lso_tun_fmt;
520
521         /* Get LMT base address and LMT ID as lcore id */
522         ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
523         left = pkts;
524 again:
525         burst = left > 32 ? 32 : left;
526         for (i = 0; i < burst; i++) {
527                 /* Perform header writes for TSO, barrier at
528                  * lmt steorl will suffice.
529                  */
530                 if (flags & NIX_TX_OFFLOAD_TSO_F)
531                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
532
533                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
534                                        lso_tun_fmt);
535                 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
536                                               tx_pkts[i]->ol_flags, 4, flags);
537                 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
538         }
539
540         /* Trigger LMTST */
541         if (burst > 16) {
542                 data = cn10k_nix_tx_steor_data(flags);
543                 pa = io_addr | (data & 0x7) << 4;
544                 data &= ~0x7ULL;
545                 data |= (15ULL << 12);
546                 data |= (uint64_t)lmt_id;
547
548                 /* STEOR0 */
549                 roc_lmt_submit_steorl(data, pa);
550
551                 data = cn10k_nix_tx_steor_data(flags);
552                 pa = io_addr | (data & 0x7) << 4;
553                 data &= ~0x7ULL;
554                 data |= ((uint64_t)(burst - 17)) << 12;
555                 data |= (uint64_t)(lmt_id + 16);
556
557                 /* STEOR1 */
558                 roc_lmt_submit_steorl(data, pa);
559         } else if (burst) {
560                 data = cn10k_nix_tx_steor_data(flags);
561                 pa = io_addr | (data & 0x7) << 4;
562                 data &= ~0x7ULL;
563                 data |= ((uint64_t)(burst - 1)) << 12;
564                 data |= lmt_id;
565
566                 /* STEOR0 */
567                 roc_lmt_submit_steorl(data, pa);
568         }
569
570         left -= burst;
571         rte_io_wmb();
572         if (left) {
573                 /* Start processing another burst */
574                 tx_pkts += burst;
575                 /* Reset lmt base addr */
576                 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
577                 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
578                 goto again;
579         }
580
581         return pkts;
582 }
583
584 static __rte_always_inline uint16_t
585 cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
586                          uint16_t pkts, uint64_t *cmd, const uint16_t flags)
587 {
588         struct cn10k_eth_txq *txq = tx_queue;
589         uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
590         const rte_iova_t io_addr = txq->io_addr;
591         uint16_t segdw, lmt_id, burst, left, i;
592         uint64_t data0, data1;
593         uint64_t lso_tun_fmt;
594         __uint128_t data128;
595         uint16_t shft;
596
597         NIX_XMIT_FC_OR_RETURN(txq, pkts);
598
599         cn10k_nix_tx_skeleton(txq, cmd, flags);
600
601         /* Reduce the cached count */
602         txq->fc_cache_pkts -= pkts;
603
604         if (flags & NIX_TX_OFFLOAD_TSO_F)
605                 lso_tun_fmt = txq->lso_tun_fmt;
606
607         /* Get LMT base address and LMT ID as lcore id */
608         ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
609         left = pkts;
610 again:
611         burst = left > 32 ? 32 : left;
612         shft = 16;
613         data128 = 0;
614         for (i = 0; i < burst; i++) {
615                 /* Perform header writes for TSO, barrier at
616                  * lmt steorl will suffice.
617                  */
618                 if (flags & NIX_TX_OFFLOAD_TSO_F)
619                         cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
620
621                 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
622                                        lso_tun_fmt);
623                 /* Store sg list directly on lmt line */
624                 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
625                                                flags);
626                 cn10k_nix_xmit_prepare_tstamp(lmt_addr, &txq->cmd[0],
627                                               tx_pkts[i]->ol_flags, segdw,
628                                               flags);
629                 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
630                 data128 |= (((__uint128_t)(segdw - 1)) << shft);
631                 shft += 3;
632         }
633
634         data0 = (uint64_t)data128;
635         data1 = (uint64_t)(data128 >> 64);
636         /* Make data0 similar to data1 */
637         data0 >>= 16;
638         /* Trigger LMTST */
639         if (burst > 16) {
640                 pa0 = io_addr | (data0 & 0x7) << 4;
641                 data0 &= ~0x7ULL;
642                 /* Move lmtst1..15 sz to bits 63:19 */
643                 data0 <<= 16;
644                 data0 |= (15ULL << 12);
645                 data0 |= (uint64_t)lmt_id;
646
647                 /* STEOR0 */
648                 roc_lmt_submit_steorl(data0, pa0);
649
650                 pa1 = io_addr | (data1 & 0x7) << 4;
651                 data1 &= ~0x7ULL;
652                 data1 <<= 16;
653                 data1 |= ((uint64_t)(burst - 17)) << 12;
654                 data1 |= (uint64_t)(lmt_id + 16);
655
656                 /* STEOR1 */
657                 roc_lmt_submit_steorl(data1, pa1);
658         } else if (burst) {
659                 pa0 = io_addr | (data0 & 0x7) << 4;
660                 data0 &= ~0x7ULL;
661                 /* Move lmtst1..15 sz to bits 63:19 */
662                 data0 <<= 16;
663                 data0 |= ((burst - 1) << 12);
664                 data0 |= (uint64_t)lmt_id;
665
666                 /* STEOR0 */
667                 roc_lmt_submit_steorl(data0, pa0);
668         }
669
670         left -= burst;
671         rte_io_wmb();
672         if (left) {
673                 /* Start processing another burst */
674                 tx_pkts += burst;
675                 /* Reset lmt base addr */
676                 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
677                 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
678                 goto again;
679         }
680
681         return pkts;
682 }
683
684 #if defined(RTE_ARCH_ARM64)
685
686 #define NIX_DESCS_PER_LOOP 4
687 static __rte_always_inline uint16_t
688 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
689                            uint16_t pkts, uint64_t *cmd, const uint16_t flags)
690 {
691         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
692         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
693         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
694         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
695         uint64x2_t senddesc01_w0, senddesc23_w0;
696         uint64x2_t senddesc01_w1, senddesc23_w1;
697         uint16_t left, scalar, burst, i, lmt_id;
698         uint64x2_t sgdesc01_w0, sgdesc23_w0;
699         uint64x2_t sgdesc01_w1, sgdesc23_w1;
700         struct cn10k_eth_txq *txq = tx_queue;
701         uintptr_t laddr = txq->lmt_base;
702         rte_iova_t io_addr = txq->io_addr;
703         uint64x2_t ltypes01, ltypes23;
704         uint64x2_t xtmp128, ytmp128;
705         uint64x2_t xmask01, xmask23;
706         uint8_t lnum;
707
708         NIX_XMIT_FC_OR_RETURN(txq, pkts);
709
710         scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
711         pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
712
713         /* Reduce the cached count */
714         txq->fc_cache_pkts -= pkts;
715
716         senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
717         senddesc23_w0 = senddesc01_w0;
718         senddesc01_w1 = vdupq_n_u64(0);
719         senddesc23_w1 = senddesc01_w1;
720         sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
721         sgdesc23_w0 = sgdesc01_w0;
722
723         /* Get LMT base address and LMT ID as lcore id */
724         ROC_LMT_BASE_ID_GET(laddr, lmt_id);
725         left = pkts;
726 again:
727         /* Number of packets to prepare depends on offloads enabled. */
728         burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
729                               cn10k_nix_pkts_per_vec_brst(flags) :
730                               left;
731         lnum = 0;
732         for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
733                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
734                 senddesc01_w0 =
735                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
736                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
737
738                 senddesc23_w0 = senddesc01_w0;
739                 sgdesc23_w0 = sgdesc01_w0;
740
741                 /* Move mbufs to iova */
742                 mbuf0 = (uint64_t *)tx_pkts[0];
743                 mbuf1 = (uint64_t *)tx_pkts[1];
744                 mbuf2 = (uint64_t *)tx_pkts[2];
745                 mbuf3 = (uint64_t *)tx_pkts[3];
746
747                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
748                                      offsetof(struct rte_mbuf, buf_iova));
749                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
750                                      offsetof(struct rte_mbuf, buf_iova));
751                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
752                                      offsetof(struct rte_mbuf, buf_iova));
753                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
754                                      offsetof(struct rte_mbuf, buf_iova));
755                 /*
756                  * Get mbuf's, olflags, iova, pktlen, dataoff
757                  * dataoff_iovaX.D[0] = iova,
758                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
759                  * len_olflagsX.D[0] = ol_flags,
760                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
761                  */
762                 dataoff_iova0 = vld1q_u64(mbuf0);
763                 len_olflags0 = vld1q_u64(mbuf0 + 2);
764                 dataoff_iova1 = vld1q_u64(mbuf1);
765                 len_olflags1 = vld1q_u64(mbuf1 + 2);
766                 dataoff_iova2 = vld1q_u64(mbuf2);
767                 len_olflags2 = vld1q_u64(mbuf2 + 2);
768                 dataoff_iova3 = vld1q_u64(mbuf3);
769                 len_olflags3 = vld1q_u64(mbuf3 + 2);
770
771                 /* Move mbufs to point pool */
772                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
773                                      offsetof(struct rte_mbuf, pool) -
774                                      offsetof(struct rte_mbuf, buf_iova));
775                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
776                                      offsetof(struct rte_mbuf, pool) -
777                                      offsetof(struct rte_mbuf, buf_iova));
778                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
779                                      offsetof(struct rte_mbuf, pool) -
780                                      offsetof(struct rte_mbuf, buf_iova));
781                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
782                                      offsetof(struct rte_mbuf, pool) -
783                                      offsetof(struct rte_mbuf, buf_iova));
784
785                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
786                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
787                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
788                         /*
789                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
790                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
791                          */
792
793                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
794                                      : [a] "+w"(senddesc01_w1)
795                                      : [in] "r"(mbuf0 + 2)
796                                      : "memory");
797
798                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
799                                      : [a] "+w"(senddesc01_w1)
800                                      : [in] "r"(mbuf1 + 2)
801                                      : "memory");
802
803                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
804                                      : [b] "+w"(senddesc23_w1)
805                                      : [in] "r"(mbuf2 + 2)
806                                      : "memory");
807
808                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
809                                      : [b] "+w"(senddesc23_w1)
810                                      : [in] "r"(mbuf3 + 2)
811                                      : "memory");
812
813                         /* Get pool pointer alone */
814                         mbuf0 = (uint64_t *)*mbuf0;
815                         mbuf1 = (uint64_t *)*mbuf1;
816                         mbuf2 = (uint64_t *)*mbuf2;
817                         mbuf3 = (uint64_t *)*mbuf3;
818                 } else {
819                         /* Get pool pointer alone */
820                         mbuf0 = (uint64_t *)*mbuf0;
821                         mbuf1 = (uint64_t *)*mbuf1;
822                         mbuf2 = (uint64_t *)*mbuf2;
823                         mbuf3 = (uint64_t *)*mbuf3;
824                 }
825
826                 const uint8x16_t shuf_mask2 = {
827                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
828                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
829                 };
830                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
831                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
832
833                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
834                 const uint64x2_t and_mask0 = {
835                         0xFFFFFFFFFFFFFFFF,
836                         0x000000000000FFFF,
837                 };
838
839                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
840                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
841                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
842                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
843
844                 /*
845                  * Pick only 16 bits of pktlen preset at bits 63:32
846                  * and place them at bits 15:0.
847                  */
848                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
849                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
850
851                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
852                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
853                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
854
855                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
856                  * pktlen at 15:0 position.
857                  */
858                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
859                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
860                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
861                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
862
863                 /* Move mbuf to point to pool_id. */
864                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
865                                      offsetof(struct rte_mempool, pool_id));
866                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
867                                      offsetof(struct rte_mempool, pool_id));
868                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
869                                      offsetof(struct rte_mempool, pool_id));
870                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
871                                      offsetof(struct rte_mempool, pool_id));
872
873                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
874                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
875                         /*
876                          * Lookup table to translate ol_flags to
877                          * il3/il4 types. But we still use ol3/ol4 types in
878                          * senddesc_w1 as only one header processing is enabled.
879                          */
880                         const uint8x16_t tbl = {
881                                 /* [0-15] = il4type:il3type */
882                                 0x04, /* none (IPv6 assumed) */
883                                 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
884                                 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
885                                 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
886                                 0x03, /* PKT_TX_IP_CKSUM */
887                                 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
888                                 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
889                                 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
890                                 0x02, /* PKT_TX_IPV4  */
891                                 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
892                                 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
893                                 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
894                                 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
895                                 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
896                                        * PKT_TX_TCP_CKSUM
897                                        */
898                                 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
899                                        * PKT_TX_SCTP_CKSUM
900                                        */
901                                 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
902                                        * PKT_TX_UDP_CKSUM
903                                        */
904                         };
905
906                         /* Extract olflags to translate to iltypes */
907                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
908                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
909
910                         /*
911                          * E(47):L3_LEN(9):L2_LEN(7+z)
912                          * E(47):L3_LEN(9):L2_LEN(7+z)
913                          */
914                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
915                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
916
917                         /* Move OLFLAGS bits 55:52 to 51:48
918                          * with zeros preprended on the byte and rest
919                          * don't care
920                          */
921                         xtmp128 = vshrq_n_u8(xtmp128, 4);
922                         ytmp128 = vshrq_n_u8(ytmp128, 4);
923                         /*
924                          * E(48):L3_LEN(8):L2_LEN(z+7)
925                          * E(48):L3_LEN(8):L2_LEN(z+7)
926                          */
927                         const int8x16_t tshft3 = {
928                                 -1, 0, 8, 8, 8, 8, 8, 8,
929                                 -1, 0, 8, 8, 8, 8, 8, 8,
930                         };
931
932                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
933                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
934
935                         /* Do the lookup */
936                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
937                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
938
939                         /* Pick only relevant fields i.e Bit 48:55 of iltype
940                          * and place it in ol3/ol4type of senddesc_w1
941                          */
942                         const uint8x16_t shuf_mask0 = {
943                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
944                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
945                         };
946
947                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
948                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
949
950                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
951                          * a [E(32):E(16):OL3(8):OL2(8)]
952                          * a = a + (a << 8)
953                          * a [E(32):E(16):(OL3+OL2):OL2]
954                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
955                          */
956                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
957                                                  vshlq_n_u16(senddesc01_w1, 8));
958                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
959                                                  vshlq_n_u16(senddesc23_w1, 8));
960
961                         /* Move ltypes to senddesc*_w1 */
962                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
963                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
964                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
965                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
966                         /*
967                          * Lookup table to translate ol_flags to
968                          * ol3/ol4 types.
969                          */
970
971                         const uint8x16_t tbl = {
972                                 /* [0-15] = ol4type:ol3type */
973                                 0x00, /* none */
974                                 0x03, /* OUTER_IP_CKSUM */
975                                 0x02, /* OUTER_IPV4 */
976                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
977                                 0x04, /* OUTER_IPV6 */
978                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
979                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
980                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
981                                        * OUTER_IP_CKSUM
982                                        */
983                                 0x00, /* OUTER_UDP_CKSUM */
984                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
985                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
986                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
987                                        * OUTER_IP_CKSUM
988                                        */
989                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
990                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
991                                        * OUTER_IP_CKSUM
992                                        */
993                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
994                                        * OUTER_IPV4
995                                        */
996                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
997                                        * OUTER_IPV4 | OUTER_IP_CKSUM
998                                        */
999                         };
1000
1001                         /* Extract olflags to translate to iltypes */
1002                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1003                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1004
1005                         /*
1006                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1007                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1008                          */
1009                         const uint8x16_t shuf_mask5 = {
1010                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1011                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1012                         };
1013                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1014                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1015
1016                         /* Extract outer ol flags only */
1017                         const uint64x2_t o_cksum_mask = {
1018                                 0x1C00020000000000,
1019                                 0x1C00020000000000,
1020                         };
1021
1022                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1023                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1024
1025                         /* Extract OUTER_UDP_CKSUM bit 41 and
1026                          * move it to bit 61
1027                          */
1028
1029                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1030                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1031
1032                         /* Shift oltype by 2 to start nibble from BIT(56)
1033                          * instead of BIT(58)
1034                          */
1035                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1036                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1037                         /*
1038                          * E(48):L3_LEN(8):L2_LEN(z+7)
1039                          * E(48):L3_LEN(8):L2_LEN(z+7)
1040                          */
1041                         const int8x16_t tshft3 = {
1042                                 -1, 0, 8, 8, 8, 8, 8, 8,
1043                                 -1, 0, 8, 8, 8, 8, 8, 8,
1044                         };
1045
1046                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1047                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1048
1049                         /* Do the lookup */
1050                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1051                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1052
1053                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1054                          * and place it in ol3/ol4type of senddesc_w1
1055                          */
1056                         const uint8x16_t shuf_mask0 = {
1057                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1058                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1059                         };
1060
1061                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1062                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1063
1064                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1065                          * a [E(32):E(16):OL3(8):OL2(8)]
1066                          * a = a + (a << 8)
1067                          * a [E(32):E(16):(OL3+OL2):OL2]
1068                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1069                          */
1070                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1071                                                  vshlq_n_u16(senddesc01_w1, 8));
1072                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1073                                                  vshlq_n_u16(senddesc23_w1, 8));
1074
1075                         /* Move ltypes to senddesc*_w1 */
1076                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1077                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1078                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1079                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1080                         /* Lookup table to translate ol_flags to
1081                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1082                          */
1083                         const uint8x16x2_t tbl = {{
1084                                 {
1085                                         /* [0-15] = il4type:il3type */
1086                                         0x04, /* none (IPv6) */
1087                                         0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1088                                         0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1089                                         0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1090                                         0x03, /* PKT_TX_IP_CKSUM */
1091                                         0x13, /* PKT_TX_IP_CKSUM |
1092                                                * PKT_TX_TCP_CKSUM
1093                                                */
1094                                         0x23, /* PKT_TX_IP_CKSUM |
1095                                                * PKT_TX_SCTP_CKSUM
1096                                                */
1097                                         0x33, /* PKT_TX_IP_CKSUM |
1098                                                * PKT_TX_UDP_CKSUM
1099                                                */
1100                                         0x02, /* PKT_TX_IPV4 */
1101                                         0x12, /* PKT_TX_IPV4 |
1102                                                * PKT_TX_TCP_CKSUM
1103                                                */
1104                                         0x22, /* PKT_TX_IPV4 |
1105                                                * PKT_TX_SCTP_CKSUM
1106                                                */
1107                                         0x32, /* PKT_TX_IPV4 |
1108                                                * PKT_TX_UDP_CKSUM
1109                                                */
1110                                         0x03, /* PKT_TX_IPV4 |
1111                                                * PKT_TX_IP_CKSUM
1112                                                */
1113                                         0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1114                                                * PKT_TX_TCP_CKSUM
1115                                                */
1116                                         0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1117                                                * PKT_TX_SCTP_CKSUM
1118                                                */
1119                                         0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1120                                                * PKT_TX_UDP_CKSUM
1121                                                */
1122                                 },
1123
1124                                 {
1125                                         /* [16-31] = ol4type:ol3type */
1126                                         0x00, /* none */
1127                                         0x03, /* OUTER_IP_CKSUM */
1128                                         0x02, /* OUTER_IPV4 */
1129                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1130                                         0x04, /* OUTER_IPV6 */
1131                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1132                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1133                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1134                                                * OUTER_IP_CKSUM
1135                                                */
1136                                         0x00, /* OUTER_UDP_CKSUM */
1137                                         0x33, /* OUTER_UDP_CKSUM |
1138                                                * OUTER_IP_CKSUM
1139                                                */
1140                                         0x32, /* OUTER_UDP_CKSUM |
1141                                                * OUTER_IPV4
1142                                                */
1143                                         0x33, /* OUTER_UDP_CKSUM |
1144                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1145                                                */
1146                                         0x34, /* OUTER_UDP_CKSUM |
1147                                                * OUTER_IPV6
1148                                                */
1149                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1150                                                * OUTER_IP_CKSUM
1151                                                */
1152                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1153                                                * OUTER_IPV4
1154                                                */
1155                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1156                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1157                                                */
1158                                 },
1159                         }};
1160
1161                         /* Extract olflags to translate to oltype & iltype */
1162                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1163                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1164
1165                         /*
1166                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1167                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1168                          */
1169                         const uint32x4_t tshft_4 = {
1170                                 1,
1171                                 0,
1172                                 1,
1173                                 0,
1174                         };
1175                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1176                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1177
1178                         /*
1179                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1180                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1181                          */
1182                         const uint8x16_t shuf_mask5 = {
1183                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1184                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1185                         };
1186                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1187                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1188
1189                         /* Extract outer and inner header ol_flags */
1190                         const uint64x2_t oi_cksum_mask = {
1191                                 0x1CF0020000000000,
1192                                 0x1CF0020000000000,
1193                         };
1194
1195                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1196                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1197
1198                         /* Extract OUTER_UDP_CKSUM bit 41 and
1199                          * move it to bit 61
1200                          */
1201
1202                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1203                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1204
1205                         /* Shift right oltype by 2 and iltype by 4
1206                          * to start oltype nibble from BIT(58)
1207                          * instead of BIT(56) and iltype nibble from BIT(48)
1208                          * instead of BIT(52).
1209                          */
1210                         const int8x16_t tshft5 = {
1211                                 8, 8, 8, 8, 8, 8, -4, -2,
1212                                 8, 8, 8, 8, 8, 8, -4, -2,
1213                         };
1214
1215                         xtmp128 = vshlq_u8(xtmp128, tshft5);
1216                         ytmp128 = vshlq_u8(ytmp128, tshft5);
1217                         /*
1218                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1219                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1220                          */
1221                         const int8x16_t tshft3 = {
1222                                 -1, 0, -1, 0, 0, 0, 0, 0,
1223                                 -1, 0, -1, 0, 0, 0, 0, 0,
1224                         };
1225
1226                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1227                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1228
1229                         /* Mark Bit(4) of oltype */
1230                         const uint64x2_t oi_cksum_mask2 = {
1231                                 0x1000000000000000,
1232                                 0x1000000000000000,
1233                         };
1234
1235                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1236                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1237
1238                         /* Do the lookup */
1239                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1240                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1241
1242                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
1243                          * Bit 56:63 of oltype and place it in corresponding
1244                          * place in senddesc_w1.
1245                          */
1246                         const uint8x16_t shuf_mask0 = {
1247                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1248                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1249                         };
1250
1251                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1252                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1253
1254                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1255                          * l3len, l2len, ol3len, ol2len.
1256                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1257                          * a = a + (a << 8)
1258                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1259                          * a = a + (a << 16)
1260                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1261                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1262                          */
1263                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1264                                                  vshlq_n_u32(senddesc01_w1, 8));
1265                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1266                                                  vshlq_n_u32(senddesc23_w1, 8));
1267
1268                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1269                         senddesc01_w1 = vaddq_u8(
1270                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1271                         senddesc23_w1 = vaddq_u8(
1272                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1273
1274                         /* Move ltypes to senddesc*_w1 */
1275                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1276                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1277                 }
1278
1279                 xmask01 = vdupq_n_u64(0);
1280                 xmask23 = xmask01;
1281                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1282                              : [a] "+w"(xmask01)
1283                              : [in] "r"(mbuf0)
1284                              : "memory");
1285
1286                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1287                              : [a] "+w"(xmask01)
1288                              : [in] "r"(mbuf1)
1289                              : "memory");
1290
1291                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1292                              : [b] "+w"(xmask23)
1293                              : [in] "r"(mbuf2)
1294                              : "memory");
1295
1296                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1297                              : [b] "+w"(xmask23)
1298                              : [in] "r"(mbuf3)
1299                              : "memory");
1300                 xmask01 = vshlq_n_u64(xmask01, 20);
1301                 xmask23 = vshlq_n_u64(xmask23, 20);
1302
1303                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1304                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1305
1306                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1307                         /* Set don't free bit if reference count > 1 */
1308                         xmask01 = vdupq_n_u64(0);
1309                         xmask23 = xmask01;
1310
1311                         /* Move mbufs to iova */
1312                         mbuf0 = (uint64_t *)tx_pkts[0];
1313                         mbuf1 = (uint64_t *)tx_pkts[1];
1314                         mbuf2 = (uint64_t *)tx_pkts[2];
1315                         mbuf3 = (uint64_t *)tx_pkts[3];
1316
1317                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1318                                 vsetq_lane_u64(0x80000, xmask01, 0);
1319                         else
1320                                 __mempool_check_cookies(
1321                                         ((struct rte_mbuf *)mbuf0)->pool,
1322                                         (void **)&mbuf0, 1, 0);
1323
1324                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1325                                 vsetq_lane_u64(0x80000, xmask01, 1);
1326                         else
1327                                 __mempool_check_cookies(
1328                                         ((struct rte_mbuf *)mbuf1)->pool,
1329                                         (void **)&mbuf1, 1, 0);
1330
1331                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1332                                 vsetq_lane_u64(0x80000, xmask23, 0);
1333                         else
1334                                 __mempool_check_cookies(
1335                                         ((struct rte_mbuf *)mbuf2)->pool,
1336                                         (void **)&mbuf2, 1, 0);
1337
1338                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1339                                 vsetq_lane_u64(0x80000, xmask23, 1);
1340                         else
1341                                 __mempool_check_cookies(
1342                                         ((struct rte_mbuf *)mbuf3)->pool,
1343                                         (void **)&mbuf3, 1, 0);
1344                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1345                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1346                 } else {
1347                         /* Move mbufs to iova */
1348                         mbuf0 = (uint64_t *)tx_pkts[0];
1349                         mbuf1 = (uint64_t *)tx_pkts[1];
1350                         mbuf2 = (uint64_t *)tx_pkts[2];
1351                         mbuf3 = (uint64_t *)tx_pkts[3];
1352
1353                         /* Mark mempool object as "put" since
1354                          * it is freed by NIX
1355                          */
1356                         __mempool_check_cookies(
1357                                 ((struct rte_mbuf *)mbuf0)->pool,
1358                                 (void **)&mbuf0, 1, 0);
1359
1360                         __mempool_check_cookies(
1361                                 ((struct rte_mbuf *)mbuf1)->pool,
1362                                 (void **)&mbuf1, 1, 0);
1363
1364                         __mempool_check_cookies(
1365                                 ((struct rte_mbuf *)mbuf2)->pool,
1366                                 (void **)&mbuf2, 1, 0);
1367
1368                         __mempool_check_cookies(
1369                                 ((struct rte_mbuf *)mbuf3)->pool,
1370                                 (void **)&mbuf3, 1, 0);
1371                 }
1372
1373                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1374                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1375                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1376                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1377                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1378
1379                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1380                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1381                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1382                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1383
1384                 /* Store the prepared send desc to LMT lines */
1385                 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
1386                 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
1387                 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
1388                 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
1389                 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
1390                 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
1391                 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
1392                 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
1393                 lnum += 1;
1394
1395                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1396         }
1397
1398         /* Trigger LMTST */
1399         if (lnum > 16) {
1400                 data = cn10k_nix_tx_steor_vec_data(flags);
1401                 pa = io_addr | (data & 0x7) << 4;
1402                 data &= ~0x7ULL;
1403                 data |= (15ULL << 12);
1404                 data |= (uint64_t)lmt_id;
1405
1406                 /* STEOR0 */
1407                 roc_lmt_submit_steorl(data, pa);
1408
1409                 data = cn10k_nix_tx_steor_vec_data(flags);
1410                 pa = io_addr | (data & 0x7) << 4;
1411                 data &= ~0x7ULL;
1412                 data |= ((uint64_t)(lnum - 17)) << 12;
1413                 data |= (uint64_t)(lmt_id + 16);
1414
1415                 /* STEOR1 */
1416                 roc_lmt_submit_steorl(data, pa);
1417         } else if (lnum) {
1418                 data = cn10k_nix_tx_steor_vec_data(flags);
1419                 pa = io_addr | (data & 0x7) << 4;
1420                 data &= ~0x7ULL;
1421                 data |= ((uint64_t)(lnum - 1)) << 12;
1422                 data |= lmt_id;
1423
1424                 /* STEOR0 */
1425                 roc_lmt_submit_steorl(data, pa);
1426         }
1427
1428         left -= burst;
1429         rte_io_wmb();
1430         if (left)
1431                 goto again;
1432
1433         if (unlikely(scalar))
1434                 pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
1435                                             flags);
1436
1437         return pkts;
1438 }
1439
1440 #else
1441 static __rte_always_inline uint16_t
1442 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1443                            uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1444 {
1445         RTE_SET_USED(tx_queue);
1446         RTE_SET_USED(tx_pkts);
1447         RTE_SET_USED(pkts);
1448         RTE_SET_USED(cmd);
1449         RTE_SET_USED(flags);
1450         return 0;
1451 }
1452 #endif
1453
1454 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
1455 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1456 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
1457 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
1458 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
1459 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
1460
1461 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1462 #define NIX_TX_FASTPATH_MODES                                           \
1463 T(no_offload,                           0, 0, 0, 0, 0, 0,       4,      \
1464                 NIX_TX_OFFLOAD_NONE)                                    \
1465 T(l3l4csum,                             0, 0, 0, 0, 0, 1,       4,      \
1466                 L3L4CSUM_F)                                             \
1467 T(ol3ol4csum,                           0, 0, 0, 0, 1, 0,       4,      \
1468                 OL3OL4CSUM_F)                                           \
1469 T(ol3ol4csum_l3l4csum,                  0, 0, 0, 0, 1, 1,       4,      \
1470                 OL3OL4CSUM_F | L3L4CSUM_F)                              \
1471 T(vlan,                                 0, 0, 0, 1, 0, 0,       6,      \
1472                 VLAN_F)                                                 \
1473 T(vlan_l3l4csum,                        0, 0, 0, 1, 0, 1,       6,      \
1474                 VLAN_F | L3L4CSUM_F)                                    \
1475 T(vlan_ol3ol4csum,                      0, 0, 0, 1, 1, 0,       6,      \
1476                 VLAN_F | OL3OL4CSUM_F)                                  \
1477 T(vlan_ol3ol4csum_l3l4csum,             0, 0, 0, 1, 1, 1,       6,      \
1478                 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                     \
1479 T(noff,                                 0, 0, 1, 0, 0, 0,       4,      \
1480                 NOFF_F)                                                 \
1481 T(noff_l3l4csum,                        0, 0, 1, 0, 0, 1,       4,      \
1482                 NOFF_F | L3L4CSUM_F)                                    \
1483 T(noff_ol3ol4csum,                      0, 0, 1, 0, 1, 0,       4,      \
1484                 NOFF_F | OL3OL4CSUM_F)                                  \
1485 T(noff_ol3ol4csum_l3l4csum,             0, 0, 1, 0, 1, 1,       4,      \
1486                 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                     \
1487 T(noff_vlan,                            0, 0, 1, 1, 0, 0,       6,      \
1488                 NOFF_F | VLAN_F)                                        \
1489 T(noff_vlan_l3l4csum,                   0, 0, 1, 1, 0, 1,       6,      \
1490                 NOFF_F | VLAN_F | L3L4CSUM_F)                           \
1491 T(noff_vlan_ol3ol4csum,                 0, 0, 1, 1, 1, 0,       6,      \
1492                 NOFF_F | VLAN_F | OL3OL4CSUM_F)                         \
1493 T(noff_vlan_ol3ol4csum_l3l4csum,        0, 0, 1, 1, 1, 1,       6,      \
1494                 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)            \
1495 T(tso,                                  0, 1, 0, 0, 0, 0,       6,      \
1496                 TSO_F)                                                  \
1497 T(tso_l3l4csum,                         0, 1, 0, 0, 0, 1,       6,      \
1498                 TSO_F | L3L4CSUM_F)                                     \
1499 T(tso_ol3ol4csum,                       0, 1, 0, 0, 1, 0,       6,      \
1500                 TSO_F | OL3OL4CSUM_F)                                   \
1501 T(tso_ol3ol4csum_l3l4csum,              0, 1, 0, 0, 1, 1,       6,      \
1502                 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                      \
1503 T(tso_vlan,                             0, 1, 0, 1, 0, 0,       6,      \
1504                 TSO_F | VLAN_F)                                         \
1505 T(tso_vlan_l3l4csum,                    0, 1, 0, 1, 0, 1,       6,      \
1506                 TSO_F | VLAN_F | L3L4CSUM_F)                            \
1507 T(tso_vlan_ol3ol4csum,                  0, 1, 0, 1, 1, 0,       6,      \
1508                 TSO_F | VLAN_F | OL3OL4CSUM_F)                          \
1509 T(tso_vlan_ol3ol4csum_l3l4csum,         0, 1, 0, 1, 1, 1,       6,      \
1510                 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
1511 T(tso_noff,                             0, 1, 1, 0, 0, 0,       6,      \
1512                 TSO_F | NOFF_F)                                         \
1513 T(tso_noff_l3l4csum,                    0, 1, 1, 0, 0, 1,       6,      \
1514                 TSO_F | NOFF_F | L3L4CSUM_F)                            \
1515 T(tso_noff_ol3ol4csum,                  0, 1, 1, 0, 1, 0,       6,      \
1516                 TSO_F | NOFF_F | OL3OL4CSUM_F)                          \
1517 T(tso_noff_ol3ol4csum_l3l4csum,         0, 1, 1, 0, 1, 1,       6,      \
1518                 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
1519 T(tso_noff_vlan,                        0, 1, 1, 1, 0, 0,       6,      \
1520                 TSO_F | NOFF_F | VLAN_F)                                \
1521 T(tso_noff_vlan_l3l4csum,               0, 1, 1, 1, 0, 1,       6,      \
1522                 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                   \
1523 T(tso_noff_vlan_ol3ol4csum,             0, 1, 1, 1, 1, 0,       6,      \
1524                 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                 \
1525 T(tso_noff_vlan_ol3ol4csum_l3l4csum,    0, 1, 1, 1, 1, 1,       6,      \
1526                 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
1527 T(ts,                                   1, 0, 0, 0, 0, 0,       8,      \
1528                 TSP_F)                                                  \
1529 T(ts_l3l4csum,                          1, 0, 0, 0, 0, 1,       8,      \
1530                 TSP_F | L3L4CSUM_F)                                     \
1531 T(ts_ol3ol4csum,                        1, 0, 0, 0, 1, 0,       8,      \
1532                 TSP_F | OL3OL4CSUM_F)                                   \
1533 T(ts_ol3ol4csum_l3l4csum,               1, 0, 0, 0, 1, 1,       8,      \
1534                 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                      \
1535 T(ts_vlan,                              1, 0, 0, 1, 0, 0,       8,      \
1536                 TSP_F | VLAN_F)                                         \
1537 T(ts_vlan_l3l4csum,                     1, 0, 0, 1, 0, 1,       8,      \
1538                 TSP_F | VLAN_F | L3L4CSUM_F)                            \
1539 T(ts_vlan_ol3ol4csum,                   1, 0, 0, 1, 1, 0,       8,      \
1540                 TSP_F | VLAN_F | OL3OL4CSUM_F)                          \
1541 T(ts_vlan_ol3ol4csum_l3l4csum,          1, 0, 0, 1, 1, 1,       8,      \
1542                 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
1543 T(ts_noff,                              1, 0, 1, 0, 0, 0,       8,      \
1544                 TSP_F | NOFF_F)                                         \
1545 T(ts_noff_l3l4csum,                     1, 0, 1, 0, 0, 1,       8,      \
1546                 TSP_F | NOFF_F | L3L4CSUM_F)                            \
1547 T(ts_noff_ol3ol4csum,                   1, 0, 1, 0, 1, 0,       8,      \
1548                 TSP_F | NOFF_F | OL3OL4CSUM_F)                          \
1549 T(ts_noff_ol3ol4csum_l3l4csum,          1, 0, 1, 0, 1, 1,       8,      \
1550                 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)             \
1551 T(ts_noff_vlan,                         1, 0, 1, 1, 0, 0,       8,      \
1552                 TSP_F | NOFF_F | VLAN_F)                                \
1553 T(ts_noff_vlan_l3l4csum,                1, 0, 1, 1, 0, 1,       8,      \
1554                 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                   \
1555 T(ts_noff_vlan_ol3ol4csum,              1, 0, 1, 1, 1, 0,       8,      \
1556                 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                 \
1557 T(ts_noff_vlan_ol3ol4csum_l3l4csum,     1, 0, 1, 1, 1, 1,       8,      \
1558                 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
1559 T(ts_tso,                               1, 1, 0, 0, 0, 0,       8,      \
1560                 TSP_F | TSO_F)                                          \
1561 T(ts_tso_l3l4csum,                      1, 1, 0, 0, 0, 1,       8,      \
1562                 TSP_F | TSO_F | L3L4CSUM_F)                             \
1563 T(ts_tso_ol3ol4csum,                    1, 1, 0, 0, 1, 0,       8,      \
1564                 TSP_F | TSO_F | OL3OL4CSUM_F)                           \
1565 T(ts_tso_ol3ol4csum_l3l4csum,           1, 1, 0, 0, 1, 1,       8,      \
1566                 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)              \
1567 T(ts_tso_vlan,                          1, 1, 0, 1, 0, 0,       8,      \
1568                 TSP_F | TSO_F | VLAN_F)                                 \
1569 T(ts_tso_vlan_l3l4csum,                 1, 1, 0, 1, 0, 1,       8,      \
1570                 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                    \
1571 T(ts_tso_vlan_ol3ol4csum,               1, 1, 0, 1, 1, 0,       8,      \
1572                 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                  \
1573 T(ts_tso_vlan_ol3ol4csum_l3l4csum,      1, 1, 0, 1, 1, 1,       8,      \
1574                 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1575 T(ts_tso_noff,                          1, 1, 1, 0, 0, 0,       8,      \
1576                 TSP_F | TSO_F | NOFF_F)                                 \
1577 T(ts_tso_noff_l3l4csum,                 1, 1, 1, 0, 0, 1,       8,      \
1578                 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                    \
1579 T(ts_tso_noff_ol3ol4csum,               1, 1, 1, 0, 1, 0,       8,      \
1580                 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                  \
1581 T(ts_tso_noff_ol3ol4csum_l3l4csum,      1, 1, 1, 0, 1, 1,       8,      \
1582                 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1583 T(ts_tso_noff_vlan,                     1, 1, 1, 1, 0, 0,       8,      \
1584                 TSP_F | TSO_F | NOFF_F | VLAN_F)                        \
1585 T(ts_tso_noff_vlan_l3l4csum,            1, 1, 1, 1, 0, 1,       8,      \
1586                 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)           \
1587 T(ts_tso_noff_vlan_ol3ol4csum,          1, 1, 1, 1, 1, 0,       8,      \
1588                 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)         \
1589 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1,       8,      \
1590                 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1591
1592 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
1593         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
1594                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
1595                                                                                \
1596         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
1597                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
1598                                                                                \
1599         uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
1600                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1601
1602 NIX_TX_FASTPATH_MODES
1603 #undef T
1604
1605 #endif /* __CN10K_TX_H__ */