net/mlx5: add C++ include guard to public header
[dpdk.git] / drivers / net / cnxk / cn9k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN9K_TX_H__
5 #define __CN9K_TX_H__
6
7 #include <rte_vect.h>
8
9 #define NIX_TX_OFFLOAD_NONE           (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
16 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
17 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
18
19 /* Flags to control xmit_prepare function.
20  * Defining it from backwards to denote its been
21  * not used as offload flags to pick function
22  */
23 #define NIX_TX_MULTI_SEG_F BIT(15)
24
25 #define NIX_TX_NEED_SEND_HDR_W1                                                \
26         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
27          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
28
29 #define NIX_TX_NEED_EXT_HDR                                                    \
30         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
31          NIX_TX_OFFLOAD_TSO_F)
32
33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
34         do {                                                                   \
35                 /* Cached value is low, Update the fc_cache_pkts */            \
36                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
37                         /* Multiply with sqe_per_sqb to express in pkts */     \
38                         (txq)->fc_cache_pkts =                                 \
39                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
40                                 << (txq)->sqes_per_sqb_log2;                   \
41                         /* Check it again for the room */                      \
42                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
43                                 return 0;                                      \
44                 }                                                              \
45         } while (0)
46
47 /* Function to determine no of tx subdesc required in case ext
48  * sub desc is enabled.
49  */
50 static __rte_always_inline int
51 cn9k_nix_tx_ext_subs(const uint16_t flags)
52 {
53         return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
54                        ? 2
55                        : ((flags &
56                            (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
57                                   ? 1
58                                   : 0);
59 }
60
61 static __rte_always_inline void
62 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
63 {
64         uint64_t mask, ol_flags = m->ol_flags;
65
66         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
67                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
68                 uint16_t *iplen, *oiplen, *oudplen;
69                 uint16_t lso_sb, paylen;
70
71                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
72                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
73                          m->l2_len + m->l3_len + m->l4_len;
74
75                 /* Reduce payload len from base headers */
76                 paylen = m->pkt_len - lso_sb;
77
78                 /* Get iplen position assuming no tunnel hdr */
79                 iplen = (uint16_t *)(mdata + m->l2_len +
80                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
81                 /* Handle tunnel tso */
82                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
83                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
84                         const uint8_t is_udp_tun =
85                                 (CNXK_NIX_UDP_TUN_BITMASK >>
86                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
87                                 0x1;
88
89                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
90                                               (2 << !!(ol_flags &
91                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
92                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
93                                                    paylen);
94
95                         /* Update format for UDP tunneled packet */
96                         if (is_udp_tun) {
97                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
98                                                        m->outer_l3_len + 4);
99                                 *oudplen = rte_cpu_to_be_16(
100                                         rte_be_to_cpu_16(*oudplen) - paylen);
101                         }
102
103                         /* Update iplen position to inner ip hdr */
104                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
105                                              m->l4_len +
106                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
107                 }
108
109                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
110         }
111 }
112
113 static __rte_always_inline void
114 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
115                       const uint64_t lso_tun_fmt)
116 {
117         struct nix_send_ext_s *send_hdr_ext;
118         struct nix_send_hdr_s *send_hdr;
119         uint64_t ol_flags = 0, mask;
120         union nix_send_hdr_w1_u w1;
121         union nix_send_sg_s *sg;
122
123         send_hdr = (struct nix_send_hdr_s *)cmd;
124         if (flags & NIX_TX_NEED_EXT_HDR) {
125                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
126                 sg = (union nix_send_sg_s *)(cmd + 4);
127                 /* Clear previous markings */
128                 send_hdr_ext->w0.lso = 0;
129                 send_hdr_ext->w1.u = 0;
130         } else {
131                 sg = (union nix_send_sg_s *)(cmd + 2);
132         }
133
134         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
135                 ol_flags = m->ol_flags;
136                 w1.u = 0;
137         }
138
139         if (!(flags & NIX_TX_MULTI_SEG_F)) {
140                 send_hdr->w0.total = m->data_len;
141                 send_hdr->w0.aura =
142                         roc_npa_aura_handle_to_aura(m->pool->pool_id);
143         }
144
145         /*
146          * L3type:  2 => IPV4
147          *          3 => IPV4 with csum
148          *          4 => IPV6
149          * L3type and L3ptr needs to be set for either
150          * L3 csum or L4 csum or LSO
151          *
152          */
153
154         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
155             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
156                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
157                 const uint8_t ol3type =
158                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
159                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
160                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
161
162                 /* Outer L3 */
163                 w1.ol3type = ol3type;
164                 mask = 0xffffull << ((!!ol3type) << 4);
165                 w1.ol3ptr = ~mask & m->outer_l2_len;
166                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
167
168                 /* Outer L4 */
169                 w1.ol4type = csum + (csum << 1);
170
171                 /* Inner L3 */
172                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
173                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
174                 w1.il3ptr = w1.ol4ptr + m->l2_len;
175                 w1.il4ptr = w1.il3ptr + m->l3_len;
176                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
177                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
178
179                 /* Inner L4 */
180                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
181
182                 /* In case of no tunnel header use only
183                  * shift IL3/IL4 fields a bit to use
184                  * OL3/OL4 for header checksum
185                  */
186                 mask = !ol3type;
187                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
188                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
189
190         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
191                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
192                 const uint8_t outer_l2_len = m->outer_l2_len;
193
194                 /* Outer L3 */
195                 w1.ol3ptr = outer_l2_len;
196                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
197                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
198                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
199                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
200                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
201
202                 /* Outer L4 */
203                 w1.ol4type = csum + (csum << 1);
204
205         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
206                 const uint8_t l2_len = m->l2_len;
207
208                 /* Always use OLXPTR and OLXTYPE when only
209                  * when one header is present
210                  */
211
212                 /* Inner L3 */
213                 w1.ol3ptr = l2_len;
214                 w1.ol4ptr = l2_len + m->l3_len;
215                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
216                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
217                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
218                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
219
220                 /* Inner L4 */
221                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
222         }
223
224         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
225                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
226                 /* HW will update ptr after vlan0 update */
227                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
228                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
229
230                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
231                 /* 2B before end of l2 header */
232                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
233                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
234         }
235
236         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
237                 uint16_t lso_sb;
238                 uint64_t mask;
239
240                 mask = -(!w1.il3type);
241                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
242
243                 send_hdr_ext->w0.lso_sb = lso_sb;
244                 send_hdr_ext->w0.lso = 1;
245                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
246                 send_hdr_ext->w0.lso_format =
247                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
248                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
249
250                 /* Handle tunnel tso */
251                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
252                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
253                         const uint8_t is_udp_tun =
254                                 (CNXK_NIX_UDP_TUN_BITMASK >>
255                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
256                                 0x1;
257                         uint8_t shift = is_udp_tun ? 32 : 0;
258
259                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
260                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
261
262                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
263                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
264                         /* Update format for UDP tunneled packet */
265                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
266                 }
267         }
268
269         if (flags & NIX_TX_NEED_SEND_HDR_W1)
270                 send_hdr->w1.u = w1.u;
271
272         if (!(flags & NIX_TX_MULTI_SEG_F)) {
273                 sg->seg1_size = m->data_len;
274                 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
275
276                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
277                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
278                          *              is greater than 1
279                          * DF bit = 0 otherwise
280                          */
281                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
282                         /* Ensuring mbuf fields which got updated in
283                          * cnxk_nix_prefree_seg are written before LMTST.
284                          */
285                         rte_io_wmb();
286                 }
287                 /* Mark mempool object as "put" since it is freed by NIX */
288                 if (!send_hdr->w0.df)
289                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
290         }
291 }
292
293 static __rte_always_inline void
294 cn9k_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc,
295                              const uint64_t ol_flags, const uint16_t no_segdw,
296                              const uint16_t flags)
297 {
298         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
299                 struct nix_send_mem_s *send_mem;
300                 uint16_t off = (no_segdw - 1) << 1;
301                 const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
302
303                 send_mem = (struct nix_send_mem_s *)(cmd + off);
304                 if (flags & NIX_TX_MULTI_SEG_F) {
305                         /* Retrieving the default desc values */
306                         cmd[off] = send_mem_desc[6];
307
308                         /* Using compiler barrier to avoid violation of C
309                          * aliasing rules.
310                          */
311                         rte_compiler_barrier();
312                 }
313
314                 /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
315                  * should not be recorded, hence changing the alg type to
316                  * NIX_SENDMEMALG_SET and also changing send mem addr field to
317                  * next 8 bytes as it corrupts the actual Tx tstamp registered
318                  * address.
319                  */
320                 send_mem->w0.cn9k.alg =
321                         NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
322
323                 send_mem->addr = (rte_iova_t)((uint64_t *)send_mem_desc[7] +
324                                               (is_ol_tstamp));
325         }
326 }
327
328 static __rte_always_inline void
329 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
330                   const uint32_t flags)
331 {
332         uint64_t lmt_status;
333
334         do {
335                 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
336                 lmt_status = roc_lmt_submit_ldeor(io_addr);
337         } while (lmt_status == 0);
338 }
339
340 static __rte_always_inline void
341 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
342 {
343         roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
344 }
345
346 static __rte_always_inline uint64_t
347 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
348 {
349         return roc_lmt_submit_ldeor(io_addr);
350 }
351
352 static __rte_always_inline uint64_t
353 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
354 {
355         return roc_lmt_submit_ldeorl(io_addr);
356 }
357
358 static __rte_always_inline uint16_t
359 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
360 {
361         struct nix_send_hdr_s *send_hdr;
362         union nix_send_sg_s *sg;
363         struct rte_mbuf *m_next;
364         uint64_t *slist, sg_u;
365         uint64_t nb_segs;
366         uint64_t segdw;
367         uint8_t off, i;
368
369         send_hdr = (struct nix_send_hdr_s *)cmd;
370         send_hdr->w0.total = m->pkt_len;
371         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
372
373         if (flags & NIX_TX_NEED_EXT_HDR)
374                 off = 2;
375         else
376                 off = 0;
377
378         sg = (union nix_send_sg_s *)&cmd[2 + off];
379         /* Clear sg->u header before use */
380         sg->u &= 0xFC00000000000000;
381         sg_u = sg->u;
382         slist = &cmd[3 + off];
383
384         i = 0;
385         nb_segs = m->nb_segs;
386
387         /* Fill mbuf segments */
388         do {
389                 m_next = m->next;
390                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
391                 *slist = rte_mbuf_data_iova(m);
392                 /* Set invert df if buffer is not to be freed by H/W */
393                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
394                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
395                         /* Commit changes to mbuf */
396                         rte_io_wmb();
397                 }
398                 /* Mark mempool object as "put" since it is freed by NIX */
399 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
400                 if (!(sg_u & (1ULL << (i + 55))))
401                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
402                 rte_io_wmb();
403 #endif
404                 slist++;
405                 i++;
406                 nb_segs--;
407                 if (i > 2 && nb_segs) {
408                         i = 0;
409                         /* Next SG subdesc */
410                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
411                         sg->u = sg_u;
412                         sg->segs = 3;
413                         sg = (union nix_send_sg_s *)slist;
414                         sg_u = sg->u;
415                         slist++;
416                 }
417                 m = m_next;
418         } while (nb_segs);
419
420         sg->u = sg_u;
421         sg->segs = i;
422         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
423         /* Roundup extra dwords to multiple of 2 */
424         segdw = (segdw >> 1) + (segdw & 0x1);
425         /* Default dwords */
426         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
427         send_hdr->w0.sizem1 = segdw - 1;
428
429         return segdw;
430 }
431
432 static __rte_always_inline void
433 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
434 {
435         roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
436 }
437
438 static __rte_always_inline void
439 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
440                        uint16_t segdw)
441 {
442         uint64_t lmt_status;
443
444         do {
445                 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
446                 lmt_status = roc_lmt_submit_ldeor(io_addr);
447         } while (lmt_status == 0);
448 }
449
450 static __rte_always_inline void
451 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
452                                rte_iova_t io_addr, uint16_t segdw)
453 {
454         uint64_t lmt_status;
455
456         rte_io_wmb();
457         do {
458                 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
459                 lmt_status = roc_lmt_submit_ldeor(io_addr);
460         } while (lmt_status == 0);
461 }
462
463 static __rte_always_inline uint16_t
464 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
465                    uint64_t *cmd, const uint16_t flags)
466 {
467         struct cn9k_eth_txq *txq = tx_queue;
468         const rte_iova_t io_addr = txq->io_addr;
469         void *lmt_addr = txq->lmt_addr;
470         uint64_t lso_tun_fmt;
471         uint16_t i;
472
473         NIX_XMIT_FC_OR_RETURN(txq, pkts);
474
475         roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
476
477         /* Perform header writes before barrier for TSO */
478         if (flags & NIX_TX_OFFLOAD_TSO_F) {
479                 lso_tun_fmt = txq->lso_tun_fmt;
480
481                 for (i = 0; i < pkts; i++)
482                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
483         }
484
485         /* Lets commit any changes in the packet here as no further changes
486          * to the packet will be done unless no fast free is enabled.
487          */
488         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
489                 rte_io_wmb();
490
491         for (i = 0; i < pkts; i++) {
492                 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
493                 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
494                                              tx_pkts[i]->ol_flags, 4, flags);
495                 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
496         }
497
498         /* Reduce the cached count */
499         txq->fc_cache_pkts -= pkts;
500
501         return pkts;
502 }
503
504 static __rte_always_inline uint16_t
505 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
506                         uint16_t pkts, uint64_t *cmd, const uint16_t flags)
507 {
508         struct cn9k_eth_txq *txq = tx_queue;
509         const rte_iova_t io_addr = txq->io_addr;
510         void *lmt_addr = txq->lmt_addr;
511         uint64_t lso_tun_fmt;
512         uint16_t segdw;
513         uint64_t i;
514
515         NIX_XMIT_FC_OR_RETURN(txq, pkts);
516
517         roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
518
519         /* Perform header writes before barrier for TSO */
520         if (flags & NIX_TX_OFFLOAD_TSO_F) {
521                 lso_tun_fmt = txq->lso_tun_fmt;
522
523                 for (i = 0; i < pkts; i++)
524                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
525         }
526
527         /* Lets commit any changes in the packet here as no further changes
528          * to the packet will be done unless no fast free is enabled.
529          */
530         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
531                 rte_io_wmb();
532
533         for (i = 0; i < pkts; i++) {
534                 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
535                 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
536                 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
537                                              tx_pkts[i]->ol_flags, segdw,
538                                              flags);
539                 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
540         }
541
542         /* Reduce the cached count */
543         txq->fc_cache_pkts -= pkts;
544
545         return pkts;
546 }
547
548 #if defined(RTE_ARCH_ARM64)
549
550 static __rte_always_inline void
551 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
552                      union nix_send_ext_w0_u *w0, uint64_t ol_flags,
553                      uint64_t flags)
554 {
555         uint16_t lso_sb;
556         uint64_t mask;
557
558         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
559                 return;
560
561         mask = -(!w1->il3type);
562         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
563
564         w0->u |= BIT(14);
565         w0->lso_sb = lso_sb;
566         w0->lso_mps = m->tso_segsz;
567         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
568         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
569
570         /* Handle tunnel tso */
571         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
572             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
573                 const uint8_t is_udp_tun =
574                         (CNXK_NIX_UDP_TUN_BITMASK >>
575                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
576                         0x1;
577
578                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
579                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
580                 /* Update format for UDP tunneled packet */
581                 w0->lso_format += is_udp_tun ? 2 : 6;
582
583                 w0->lso_format += !!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 1;
584         }
585 }
586
587 static __rte_always_inline uint8_t
588 cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
589                                union nix_send_hdr_w0_u *sh,
590                                union nix_send_sg_s *sg, const uint32_t flags)
591 {
592         struct rte_mbuf *m_next;
593         uint64_t *slist, sg_u;
594         uint16_t nb_segs;
595         uint64_t segdw;
596         int i = 1;
597
598         sh->total = m->pkt_len;
599         /* Clear sg->u header before use */
600         sg->u &= 0xFC00000000000000;
601         sg_u = sg->u;
602         slist = &cmd[0];
603
604         sg_u = sg_u | ((uint64_t)m->data_len);
605
606         nb_segs = m->nb_segs - 1;
607         m_next = m->next;
608
609         /* Set invert df if buffer is not to be freed by H/W */
610         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
611                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
612                 /* Mark mempool object as "put" since it is freed by NIX */
613 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
614         if (!(sg_u & (1ULL << 55)))
615                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
616         rte_io_wmb();
617 #endif
618
619         m = m_next;
620         /* Fill mbuf segments */
621         do {
622                 m_next = m->next;
623                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
624                 *slist = rte_mbuf_data_iova(m);
625                 /* Set invert df if buffer is not to be freed by H/W */
626                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
627                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
628                         /* Mark mempool object as "put" since it is freed by NIX
629                          */
630 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
631                 if (!(sg_u & (1ULL << (i + 55))))
632                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
633                 rte_io_wmb();
634 #endif
635                 slist++;
636                 i++;
637                 nb_segs--;
638                 if (i > 2 && nb_segs) {
639                         i = 0;
640                         /* Next SG subdesc */
641                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
642                         sg->u = sg_u;
643                         sg->segs = 3;
644                         sg = (union nix_send_sg_s *)slist;
645                         sg_u = sg->u;
646                         slist++;
647                 }
648                 m = m_next;
649         } while (nb_segs);
650
651         sg->u = sg_u;
652         sg->segs = i;
653         segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
654
655         segdw += 2;
656         /* Roundup extra dwords to multiple of 2 */
657         segdw = (segdw >> 1) + (segdw & 0x1);
658         /* Default dwords */
659         segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
660                  !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
661         sh->sizem1 = segdw - 1;
662
663         return segdw;
664 }
665
666 static __rte_always_inline uint8_t
667 cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
668                           uint64x2_t *cmd1, const uint32_t flags)
669 {
670         union nix_send_hdr_w0_u sh;
671         union nix_send_sg_s sg;
672         uint8_t ret;
673
674         if (m->nb_segs == 1) {
675                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
676                         sg.u = vgetq_lane_u64(cmd1[0], 0);
677                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
678                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
679                 }
680
681 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
682                 sg.u = vgetq_lane_u64(cmd1[0], 0);
683                 if (!(sg.u & (1ULL << 55)))
684                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
685                 rte_io_wmb();
686 #endif
687                 return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
688                        !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
689         }
690
691         sh.u = vgetq_lane_u64(cmd0[0], 0);
692         sg.u = vgetq_lane_u64(cmd1[0], 0);
693
694         ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
695
696         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
697         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
698         return ret;
699 }
700
701 #define NIX_DESCS_PER_LOOP 4
702
703 static __rte_always_inline void
704 cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
705                                uint64x2_t *cmd2, uint64x2_t *cmd3,
706                                uint8_t *segdw,
707                                uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
708                                uint64_t *lmt_addr, rte_iova_t io_addr,
709                                const uint32_t flags)
710 {
711         uint64_t lmt_status;
712         uint8_t j, off;
713
714         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
715             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
716                 /* No segments in 4 consecutive packets. */
717                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
718                         do {
719                                 vst1q_u64(lmt_addr, cmd0[0]);
720                                 vst1q_u64(lmt_addr + 2, cmd1[0]);
721                                 vst1q_u64(lmt_addr + 4, cmd0[1]);
722                                 vst1q_u64(lmt_addr + 6, cmd1[1]);
723                                 vst1q_u64(lmt_addr + 8, cmd0[2]);
724                                 vst1q_u64(lmt_addr + 10, cmd1[2]);
725                                 vst1q_u64(lmt_addr + 12, cmd0[3]);
726                                 vst1q_u64(lmt_addr + 14, cmd1[3]);
727                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
728                         } while (lmt_status == 0);
729
730                         return;
731                 }
732         }
733
734         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
735                 /* Fit consecutive packets in same LMTLINE. */
736                 if ((segdw[j] + segdw[j + 1]) <= 8) {
737 again0:
738                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
739                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
740                                 vst1q_u64(lmt_addr, cmd0[j]);
741                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
742                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
743                                 /* Copy segs */
744                                 off = segdw[j] - 4;
745                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
746                                 off <<= 1;
747                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
748
749                                 vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
750                                 vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
751                                 vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
752                                 roc_lmt_mov_seg(lmt_addr + 14 + off,
753                                                 slist[j + 1], segdw[j + 1] - 4);
754                                 off += ((segdw[j + 1] - 4) << 1);
755                                 vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
756                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
757                                 vst1q_u64(lmt_addr, cmd0[j]);
758                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
759                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
760                                 /* Copy segs */
761                                 off = segdw[j] - 3;
762                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
763                                 off <<= 1;
764                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
765                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
766                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
767                                 roc_lmt_mov_seg(lmt_addr + 12 + off,
768                                                 slist[j + 1], segdw[j + 1] - 3);
769                         } else {
770                                 vst1q_u64(lmt_addr, cmd0[j]);
771                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
772                                 /* Copy segs */
773                                 off = segdw[j] - 2;
774                                 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
775                                 off <<= 1;
776                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
777                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
778                                 roc_lmt_mov_seg(lmt_addr + 8 + off,
779                                                 slist[j + 1], segdw[j + 1] - 2);
780                         }
781                         lmt_status = roc_lmt_submit_ldeor(io_addr);
782                         if (lmt_status == 0)
783                                 goto again0;
784                         j += 2;
785                 } else {
786 again1:
787                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
788                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
789                                 vst1q_u64(lmt_addr, cmd0[j]);
790                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
791                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
792                                 /* Copy segs */
793                                 off = segdw[j] - 4;
794                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
795                                 off <<= 1;
796                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
797                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
798                                 vst1q_u64(lmt_addr, cmd0[j]);
799                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
800                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
801                                 /* Copy segs */
802                                 off = segdw[j] - 3;
803                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
804                         } else {
805                                 vst1q_u64(lmt_addr, cmd0[j]);
806                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
807                                 /* Copy segs */
808                                 off = segdw[j] - 2;
809                                 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
810                         }
811                         lmt_status = roc_lmt_submit_ldeor(io_addr);
812                         if (lmt_status == 0)
813                                 goto again1;
814                         j += 1;
815                 }
816         }
817 }
818
819 static __rte_always_inline uint16_t
820 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
821                           uint16_t pkts, uint64_t *cmd, const uint16_t flags)
822 {
823         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
824         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
825         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
826                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
827         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
828         uint64x2_t senddesc01_w0, senddesc23_w0;
829         uint64x2_t senddesc01_w1, senddesc23_w1;
830         uint64x2_t sendext01_w0, sendext23_w0;
831         uint64x2_t sendext01_w1, sendext23_w1;
832         uint64x2_t sendmem01_w0, sendmem23_w0;
833         uint64x2_t sendmem01_w1, sendmem23_w1;
834         uint64x2_t sgdesc01_w0, sgdesc23_w0;
835         uint64x2_t sgdesc01_w1, sgdesc23_w1;
836         struct cn9k_eth_txq *txq = tx_queue;
837         uint64_t *lmt_addr = txq->lmt_addr;
838         rte_iova_t io_addr = txq->io_addr;
839         uint64x2_t ltypes01, ltypes23;
840         uint64x2_t xtmp128, ytmp128;
841         uint64x2_t xmask01, xmask23;
842         uint64_t lmt_status, i;
843         uint16_t pkts_left;
844
845         NIX_XMIT_FC_OR_RETURN(txq, pkts);
846
847         pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
848         pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
849
850         /* Reduce the cached count */
851         txq->fc_cache_pkts -= pkts;
852
853         /* Perform header writes before barrier for TSO */
854         if (flags & NIX_TX_OFFLOAD_TSO_F) {
855                 for (i = 0; i < pkts; i++)
856                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
857         }
858
859         /* Lets commit any changes in the packet here as no further changes
860          * to the packet will be done unless no fast free is enabled.
861          */
862         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
863                 rte_io_wmb();
864
865         senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
866         senddesc23_w0 = senddesc01_w0;
867         senddesc01_w1 = vdupq_n_u64(0);
868         senddesc23_w1 = senddesc01_w1;
869
870         /* Load command defaults into vector variables. */
871         if (flags & NIX_TX_NEED_EXT_HDR) {
872                 sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
873                 sendext23_w0 = sendext01_w0;
874                 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
875                 sendext23_w1 = sendext01_w1;
876                 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
877                 sgdesc23_w0 = sgdesc01_w0;
878                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
879                         sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
880                         sendmem23_w0 = sendmem01_w0;
881                         sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
882                         sendmem23_w1 = sendmem01_w1;
883                 }
884         } else {
885                 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
886                 sgdesc23_w0 = sgdesc01_w0;
887         }
888
889         for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
890                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
891                 senddesc01_w0 =
892                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
893                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
894
895                 senddesc23_w0 = senddesc01_w0;
896                 sgdesc23_w0 = sgdesc01_w0;
897
898                 /* Clear vlan enables. */
899                 if (flags & NIX_TX_NEED_EXT_HDR) {
900                         sendext01_w1 = vbicq_u64(sendext01_w1,
901                                                  vdupq_n_u64(0x3FFFF00FFFF00));
902                         sendext23_w1 = sendext01_w1;
903                 }
904
905                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
906                         /* Reset send mem alg to SETTSTMP from SUB*/
907                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
908                                                  vdupq_n_u64(BIT_ULL(59)));
909                         /* Reset send mem address to default. */
910                         sendmem01_w1 =
911                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
912                         sendmem23_w0 = sendmem01_w0;
913                         sendmem23_w1 = sendmem01_w1;
914                 }
915
916                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
917                         /* Clear the LSO enable bit. */
918                         sendext01_w0 = vbicq_u64(sendext01_w0,
919                                                  vdupq_n_u64(BIT_ULL(14)));
920                         sendext23_w0 = sendext01_w0;
921                 }
922
923                 /* Move mbufs to iova */
924                 mbuf0 = (uint64_t *)tx_pkts[0];
925                 mbuf1 = (uint64_t *)tx_pkts[1];
926                 mbuf2 = (uint64_t *)tx_pkts[2];
927                 mbuf3 = (uint64_t *)tx_pkts[3];
928
929                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
930                                      offsetof(struct rte_mbuf, buf_iova));
931                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
932                                      offsetof(struct rte_mbuf, buf_iova));
933                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
934                                      offsetof(struct rte_mbuf, buf_iova));
935                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
936                                      offsetof(struct rte_mbuf, buf_iova));
937                 /*
938                  * Get mbuf's, olflags, iova, pktlen, dataoff
939                  * dataoff_iovaX.D[0] = iova,
940                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
941                  * len_olflagsX.D[0] = ol_flags,
942                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
943                  */
944                 dataoff_iova0 = vld1q_u64(mbuf0);
945                 len_olflags0 = vld1q_u64(mbuf0 + 2);
946                 dataoff_iova1 = vld1q_u64(mbuf1);
947                 len_olflags1 = vld1q_u64(mbuf1 + 2);
948                 dataoff_iova2 = vld1q_u64(mbuf2);
949                 len_olflags2 = vld1q_u64(mbuf2 + 2);
950                 dataoff_iova3 = vld1q_u64(mbuf3);
951                 len_olflags3 = vld1q_u64(mbuf3 + 2);
952
953                 /* Move mbufs to point pool */
954                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
955                                      offsetof(struct rte_mbuf, pool) -
956                                      offsetof(struct rte_mbuf, buf_iova));
957                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
958                                      offsetof(struct rte_mbuf, pool) -
959                                      offsetof(struct rte_mbuf, buf_iova));
960                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
961                                      offsetof(struct rte_mbuf, pool) -
962                                      offsetof(struct rte_mbuf, buf_iova));
963                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
964                                      offsetof(struct rte_mbuf, pool) -
965                                      offsetof(struct rte_mbuf, buf_iova));
966
967                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
968                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
969                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
970                         /*
971                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
972                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
973                          */
974
975                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
976                                      : [a] "+w"(senddesc01_w1)
977                                      : [in] "r"(mbuf0 + 2)
978                                      : "memory");
979
980                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
981                                      : [a] "+w"(senddesc01_w1)
982                                      : [in] "r"(mbuf1 + 2)
983                                      : "memory");
984
985                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
986                                      : [b] "+w"(senddesc23_w1)
987                                      : [in] "r"(mbuf2 + 2)
988                                      : "memory");
989
990                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
991                                      : [b] "+w"(senddesc23_w1)
992                                      : [in] "r"(mbuf3 + 2)
993                                      : "memory");
994
995                         /* Get pool pointer alone */
996                         mbuf0 = (uint64_t *)*mbuf0;
997                         mbuf1 = (uint64_t *)*mbuf1;
998                         mbuf2 = (uint64_t *)*mbuf2;
999                         mbuf3 = (uint64_t *)*mbuf3;
1000                 } else {
1001                         /* Get pool pointer alone */
1002                         mbuf0 = (uint64_t *)*mbuf0;
1003                         mbuf1 = (uint64_t *)*mbuf1;
1004                         mbuf2 = (uint64_t *)*mbuf2;
1005                         mbuf3 = (uint64_t *)*mbuf3;
1006                 }
1007
1008                 const uint8x16_t shuf_mask2 = {
1009                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1010                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1011                 };
1012                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1013                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1014
1015                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1016                 const uint64x2_t and_mask0 = {
1017                         0xFFFFFFFFFFFFFFFF,
1018                         0x000000000000FFFF,
1019                 };
1020
1021                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1022                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1023                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1024                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1025
1026                 /*
1027                  * Pick only 16 bits of pktlen preset at bits 63:32
1028                  * and place them at bits 15:0.
1029                  */
1030                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1031                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1032
1033                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1034                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1035                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1036
1037                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1038                  * pktlen at 15:0 position.
1039                  */
1040                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1041                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1042                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1043                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1044
1045                 /* Move mbuf to point to pool_id. */
1046                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1047                                      offsetof(struct rte_mempool, pool_id));
1048                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1049                                      offsetof(struct rte_mempool, pool_id));
1050                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1051                                      offsetof(struct rte_mempool, pool_id));
1052                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1053                                      offsetof(struct rte_mempool, pool_id));
1054
1055                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1056                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1057                         /*
1058                          * Lookup table to translate ol_flags to
1059                          * il3/il4 types. But we still use ol3/ol4 types in
1060                          * senddesc_w1 as only one header processing is enabled.
1061                          */
1062                         const uint8x16_t tbl = {
1063                                 /* [0-15] = il4type:il3type */
1064                                 0x04, /* none (IPv6 assumed) */
1065                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1066                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1067                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1068                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1069                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1070                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1071                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1072                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1073                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1074                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1075                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1076                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1077                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1078                                        * RTE_MBUF_F_TX_TCP_CKSUM
1079                                        */
1080                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1081                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1082                                        */
1083                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1084                                        * RTE_MBUF_F_TX_UDP_CKSUM
1085                                        */
1086                         };
1087
1088                         /* Extract olflags to translate to iltypes */
1089                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1090                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1091
1092                         /*
1093                          * E(47):L3_LEN(9):L2_LEN(7+z)
1094                          * E(47):L3_LEN(9):L2_LEN(7+z)
1095                          */
1096                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1097                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1098
1099                         /* Move OLFLAGS bits 55:52 to 51:48
1100                          * with zeros preprended on the byte and rest
1101                          * don't care
1102                          */
1103                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1104                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1105                         /*
1106                          * E(48):L3_LEN(8):L2_LEN(z+7)
1107                          * E(48):L3_LEN(8):L2_LEN(z+7)
1108                          */
1109                         const int8x16_t tshft3 = {
1110                                 -1, 0, 8, 8, 8, 8, 8, 8,
1111                                 -1, 0, 8, 8, 8, 8, 8, 8,
1112                         };
1113
1114                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1115                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1116
1117                         /* Do the lookup */
1118                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1119                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1120
1121                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1122                          * and place it in ol3/ol4type of senddesc_w1
1123                          */
1124                         const uint8x16_t shuf_mask0 = {
1125                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1126                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1127                         };
1128
1129                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1130                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1131
1132                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1133                          * a [E(32):E(16):OL3(8):OL2(8)]
1134                          * a = a + (a << 8)
1135                          * a [E(32):E(16):(OL3+OL2):OL2]
1136                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1137                          */
1138                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1139                                                  vshlq_n_u16(senddesc01_w1, 8));
1140                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1141                                                  vshlq_n_u16(senddesc23_w1, 8));
1142
1143                         /* Move ltypes to senddesc*_w1 */
1144                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1145                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1146                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1147                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1148                         /*
1149                          * Lookup table to translate ol_flags to
1150                          * ol3/ol4 types.
1151                          */
1152
1153                         const uint8x16_t tbl = {
1154                                 /* [0-15] = ol4type:ol3type */
1155                                 0x00, /* none */
1156                                 0x03, /* OUTER_IP_CKSUM */
1157                                 0x02, /* OUTER_IPV4 */
1158                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1159                                 0x04, /* OUTER_IPV6 */
1160                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1161                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1162                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1163                                        * OUTER_IP_CKSUM
1164                                        */
1165                                 0x00, /* OUTER_UDP_CKSUM */
1166                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1167                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1168                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1169                                        * OUTER_IP_CKSUM
1170                                        */
1171                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1172                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1173                                        * OUTER_IP_CKSUM
1174                                        */
1175                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1176                                        * OUTER_IPV4
1177                                        */
1178                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1179                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1180                                        */
1181                         };
1182
1183                         /* Extract olflags to translate to iltypes */
1184                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1185                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1186
1187                         /*
1188                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1189                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1190                          */
1191                         const uint8x16_t shuf_mask5 = {
1192                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1193                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1194                         };
1195                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1196                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1197
1198                         /* Extract outer ol flags only */
1199                         const uint64x2_t o_cksum_mask = {
1200                                 0x1C00020000000000,
1201                                 0x1C00020000000000,
1202                         };
1203
1204                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1205                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1206
1207                         /* Extract OUTER_UDP_CKSUM bit 41 and
1208                          * move it to bit 61
1209                          */
1210
1211                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1212                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1213
1214                         /* Shift oltype by 2 to start nibble from BIT(56)
1215                          * instead of BIT(58)
1216                          */
1217                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1218                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1219                         /*
1220                          * E(48):L3_LEN(8):L2_LEN(z+7)
1221                          * E(48):L3_LEN(8):L2_LEN(z+7)
1222                          */
1223                         const int8x16_t tshft3 = {
1224                                 -1, 0, 8, 8, 8, 8, 8, 8,
1225                                 -1, 0, 8, 8, 8, 8, 8, 8,
1226                         };
1227
1228                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1229                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1230
1231                         /* Do the lookup */
1232                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1233                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1234
1235                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1236                          * and place it in ol3/ol4type of senddesc_w1
1237                          */
1238                         const uint8x16_t shuf_mask0 = {
1239                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1240                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1241                         };
1242
1243                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1244                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1245
1246                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1247                          * a [E(32):E(16):OL3(8):OL2(8)]
1248                          * a = a + (a << 8)
1249                          * a [E(32):E(16):(OL3+OL2):OL2]
1250                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1251                          */
1252                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1253                                                  vshlq_n_u16(senddesc01_w1, 8));
1254                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1255                                                  vshlq_n_u16(senddesc23_w1, 8));
1256
1257                         /* Move ltypes to senddesc*_w1 */
1258                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1259                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1260                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1261                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1262                         /* Lookup table to translate ol_flags to
1263                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1264                          */
1265                         const uint8x16x2_t tbl = {{
1266                                 {
1267                                         /* [0-15] = il4type:il3type */
1268                                         0x04, /* none (IPv6) */
1269                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1270                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1271                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1272                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1273                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1274                                                * RTE_MBUF_F_TX_TCP_CKSUM
1275                                                */
1276                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
1277                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1278                                                */
1279                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
1280                                                * RTE_MBUF_F_TX_UDP_CKSUM
1281                                                */
1282                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
1283                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
1284                                                * RTE_MBUF_F_TX_TCP_CKSUM
1285                                                */
1286                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
1287                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1288                                                */
1289                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
1290                                                * RTE_MBUF_F_TX_UDP_CKSUM
1291                                                */
1292                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
1293                                                * RTE_MBUF_F_TX_IP_CKSUM
1294                                                */
1295                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1296                                                * RTE_MBUF_F_TX_TCP_CKSUM
1297                                                */
1298                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1299                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1300                                                */
1301                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1302                                                * RTE_MBUF_F_TX_UDP_CKSUM
1303                                                */
1304                                 },
1305
1306                                 {
1307                                         /* [16-31] = ol4type:ol3type */
1308                                         0x00, /* none */
1309                                         0x03, /* OUTER_IP_CKSUM */
1310                                         0x02, /* OUTER_IPV4 */
1311                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1312                                         0x04, /* OUTER_IPV6 */
1313                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1314                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1315                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1316                                                * OUTER_IP_CKSUM
1317                                                */
1318                                         0x00, /* OUTER_UDP_CKSUM */
1319                                         0x33, /* OUTER_UDP_CKSUM |
1320                                                * OUTER_IP_CKSUM
1321                                                */
1322                                         0x32, /* OUTER_UDP_CKSUM |
1323                                                * OUTER_IPV4
1324                                                */
1325                                         0x33, /* OUTER_UDP_CKSUM |
1326                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1327                                                */
1328                                         0x34, /* OUTER_UDP_CKSUM |
1329                                                * OUTER_IPV6
1330                                                */
1331                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1332                                                * OUTER_IP_CKSUM
1333                                                */
1334                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1335                                                * OUTER_IPV4
1336                                                */
1337                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1338                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1339                                                */
1340                                 },
1341                         }};
1342
1343                         /* Extract olflags to translate to oltype & iltype */
1344                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1345                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1346
1347                         /*
1348                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1349                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1350                          */
1351                         const uint32x4_t tshft_4 = {
1352                                 1,
1353                                 0,
1354                                 1,
1355                                 0,
1356                         };
1357                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1358                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1359
1360                         /*
1361                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1362                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1363                          */
1364                         const uint8x16_t shuf_mask5 = {
1365                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1366                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1367                         };
1368                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1369                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1370
1371                         /* Extract outer and inner header ol_flags */
1372                         const uint64x2_t oi_cksum_mask = {
1373                                 0x1CF0020000000000,
1374                                 0x1CF0020000000000,
1375                         };
1376
1377                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1378                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1379
1380                         /* Extract OUTER_UDP_CKSUM bit 41 and
1381                          * move it to bit 61
1382                          */
1383
1384                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1385                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1386
1387                         /* Shift right oltype by 2 and iltype by 4
1388                          * to start oltype nibble from BIT(58)
1389                          * instead of BIT(56) and iltype nibble from BIT(48)
1390                          * instead of BIT(52).
1391                          */
1392                         const int8x16_t tshft5 = {
1393                                 8, 8, 8, 8, 8, 8, -4, -2,
1394                                 8, 8, 8, 8, 8, 8, -4, -2,
1395                         };
1396
1397                         xtmp128 = vshlq_u8(xtmp128, tshft5);
1398                         ytmp128 = vshlq_u8(ytmp128, tshft5);
1399                         /*
1400                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1401                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1402                          */
1403                         const int8x16_t tshft3 = {
1404                                 -1, 0, -1, 0, 0, 0, 0, 0,
1405                                 -1, 0, -1, 0, 0, 0, 0, 0,
1406                         };
1407
1408                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1409                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1410
1411                         /* Mark Bit(4) of oltype */
1412                         const uint64x2_t oi_cksum_mask2 = {
1413                                 0x1000000000000000,
1414                                 0x1000000000000000,
1415                         };
1416
1417                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1418                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1419
1420                         /* Do the lookup */
1421                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1422                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1423
1424                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
1425                          * Bit 56:63 of oltype and place it in corresponding
1426                          * place in senddesc_w1.
1427                          */
1428                         const uint8x16_t shuf_mask0 = {
1429                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1430                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1431                         };
1432
1433                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1434                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1435
1436                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1437                          * l3len, l2len, ol3len, ol2len.
1438                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1439                          * a = a + (a << 8)
1440                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1441                          * a = a + (a << 16)
1442                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1443                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1444                          */
1445                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1446                                                  vshlq_n_u32(senddesc01_w1, 8));
1447                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1448                                                  vshlq_n_u32(senddesc23_w1, 8));
1449
1450                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1451                         senddesc01_w1 = vaddq_u8(
1452                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1453                         senddesc23_w1 = vaddq_u8(
1454                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1455
1456                         /* Move ltypes to senddesc*_w1 */
1457                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1458                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1459                 }
1460
1461                 xmask01 = vdupq_n_u64(0);
1462                 xmask23 = xmask01;
1463                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1464                              : [a] "+w"(xmask01)
1465                              : [in] "r"(mbuf0)
1466                              : "memory");
1467
1468                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1469                              : [a] "+w"(xmask01)
1470                              : [in] "r"(mbuf1)
1471                              : "memory");
1472
1473                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1474                              : [b] "+w"(xmask23)
1475                              : [in] "r"(mbuf2)
1476                              : "memory");
1477
1478                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1479                              : [b] "+w"(xmask23)
1480                              : [in] "r"(mbuf3)
1481                              : "memory");
1482                 xmask01 = vshlq_n_u64(xmask01, 20);
1483                 xmask23 = vshlq_n_u64(xmask23, 20);
1484
1485                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1486                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1487
1488                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1489                         /* Tx ol_flag for vlan. */
1490                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
1491                         /* Bit enable for VLAN1 */
1492                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1493                         /* Tx ol_flag for QnQ. */
1494                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
1495                         /* Bit enable for VLAN0 */
1496                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1497                         /* Load vlan values from packet. outer is VLAN 0 */
1498                         uint64x2_t ext01 = {
1499                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1500                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1501                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1502                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1503                         };
1504                         uint64x2_t ext23 = {
1505                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1506                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1507                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1508                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1509                         };
1510
1511                         /* Get ol_flags of the packets. */
1512                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1513                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1514
1515                         /* ORR vlan outer/inner values into cmd. */
1516                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1517                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1518
1519                         /* Test for offload enable bits and generate masks. */
1520                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1521                                                       mlv),
1522                                             vandq_u64(vtstq_u64(xtmp128, olq),
1523                                                       mlq));
1524                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1525                                                       mlv),
1526                                             vandq_u64(vtstq_u64(ytmp128, olq),
1527                                                       mlq));
1528
1529                         /* Set vlan enable bits into cmd based on mask. */
1530                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1531                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1532                 }
1533
1534                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1535                         /* Tx ol_flag for timestamp. */
1536                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
1537                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
1538                         /* Set send mem alg to SUB. */
1539                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1540                         /* Increment send mem address by 8. */
1541                         const uint64x2_t addr = {0x8, 0x8};
1542
1543                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1544                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1545
1546                         /* Check if timestamp is requested and generate inverted
1547                          * mask as we need not make any changes to default cmd
1548                          * value.
1549                          */
1550                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1551                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1552
1553                         /* Change send mem address to an 8 byte offset when
1554                          * TSTMP is disabled.
1555                          */
1556                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
1557                                                  vandq_u64(xtmp128, addr));
1558                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
1559                                                  vandq_u64(ytmp128, addr));
1560                         /* Change send mem alg to SUB when TSTMP is disabled. */
1561                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
1562                                                  vandq_u64(xtmp128, alg));
1563                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
1564                                                  vandq_u64(ytmp128, alg));
1565
1566                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1567                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1568                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1569                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1570                 }
1571
1572                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1573                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1574                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1575
1576                         /* Extract SD W1 as we need to set L4 types. */
1577                         vst1q_u64(sd_w1, senddesc01_w1);
1578                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
1579
1580                         /* Extract SX W0 as we need to set LSO fields. */
1581                         vst1q_u64(sx_w0, sendext01_w0);
1582                         vst1q_u64(sx_w0 + 2, sendext23_w0);
1583
1584                         /* Extract ol_flags. */
1585                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1586                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1587
1588                         /* Prepare individual mbufs. */
1589                         cn9k_nix_prepare_tso(tx_pkts[0],
1590                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
1591                                 (union nix_send_ext_w0_u *)&sx_w0[0],
1592                                 vgetq_lane_u64(xtmp128, 0), flags);
1593
1594                         cn9k_nix_prepare_tso(tx_pkts[1],
1595                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
1596                                 (union nix_send_ext_w0_u *)&sx_w0[1],
1597                                 vgetq_lane_u64(xtmp128, 1), flags);
1598
1599                         cn9k_nix_prepare_tso(tx_pkts[2],
1600                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
1601                                 (union nix_send_ext_w0_u *)&sx_w0[2],
1602                                 vgetq_lane_u64(ytmp128, 0), flags);
1603
1604                         cn9k_nix_prepare_tso(tx_pkts[3],
1605                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
1606                                 (union nix_send_ext_w0_u *)&sx_w0[3],
1607                                 vgetq_lane_u64(ytmp128, 1), flags);
1608
1609                         senddesc01_w1 = vld1q_u64(sd_w1);
1610                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1611
1612                         sendext01_w0 = vld1q_u64(sx_w0);
1613                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
1614                 }
1615
1616                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1617                     !(flags & NIX_TX_MULTI_SEG_F)) {
1618                         /* Set don't free bit if reference count > 1 */
1619                         xmask01 = vdupq_n_u64(0);
1620                         xmask23 = xmask01;
1621
1622                         /* Move mbufs to iova */
1623                         mbuf0 = (uint64_t *)tx_pkts[0];
1624                         mbuf1 = (uint64_t *)tx_pkts[1];
1625                         mbuf2 = (uint64_t *)tx_pkts[2];
1626                         mbuf3 = (uint64_t *)tx_pkts[3];
1627
1628                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1629                                 vsetq_lane_u64(0x80000, xmask01, 0);
1630                         else
1631                                 RTE_MEMPOOL_CHECK_COOKIES(
1632                                         ((struct rte_mbuf *)mbuf0)->pool,
1633                                         (void **)&mbuf0, 1, 0);
1634
1635                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1636                                 vsetq_lane_u64(0x80000, xmask01, 1);
1637                         else
1638                                 RTE_MEMPOOL_CHECK_COOKIES(
1639                                         ((struct rte_mbuf *)mbuf1)->pool,
1640                                         (void **)&mbuf1, 1, 0);
1641
1642                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1643                                 vsetq_lane_u64(0x80000, xmask23, 0);
1644                         else
1645                                 RTE_MEMPOOL_CHECK_COOKIES(
1646                                         ((struct rte_mbuf *)mbuf2)->pool,
1647                                         (void **)&mbuf2, 1, 0);
1648
1649                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1650                                 vsetq_lane_u64(0x80000, xmask23, 1);
1651                         else
1652                                 RTE_MEMPOOL_CHECK_COOKIES(
1653                                         ((struct rte_mbuf *)mbuf3)->pool,
1654                                         (void **)&mbuf3, 1, 0);
1655                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1656                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1657                         /* Ensuring mbuf fields which got updated in
1658                          * cnxk_nix_prefree_seg are written before LMTST.
1659                          */
1660                         rte_io_wmb();
1661                 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1662                         /* Move mbufs to iova */
1663                         mbuf0 = (uint64_t *)tx_pkts[0];
1664                         mbuf1 = (uint64_t *)tx_pkts[1];
1665                         mbuf2 = (uint64_t *)tx_pkts[2];
1666                         mbuf3 = (uint64_t *)tx_pkts[3];
1667
1668                         /* Mark mempool object as "put" since
1669                          * it is freed by NIX
1670                          */
1671                         RTE_MEMPOOL_CHECK_COOKIES(
1672                                 ((struct rte_mbuf *)mbuf0)->pool,
1673                                 (void **)&mbuf0, 1, 0);
1674
1675                         RTE_MEMPOOL_CHECK_COOKIES(
1676                                 ((struct rte_mbuf *)mbuf1)->pool,
1677                                 (void **)&mbuf1, 1, 0);
1678
1679                         RTE_MEMPOOL_CHECK_COOKIES(
1680                                 ((struct rte_mbuf *)mbuf2)->pool,
1681                                 (void **)&mbuf2, 1, 0);
1682
1683                         RTE_MEMPOOL_CHECK_COOKIES(
1684                                 ((struct rte_mbuf *)mbuf3)->pool,
1685                                 (void **)&mbuf3, 1, 0);
1686 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1687                         rte_io_wmb();
1688 #endif
1689                 }
1690
1691                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1692                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1693                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1694                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1695                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1696
1697                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1698                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1699                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1700                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1701
1702                 if (flags & NIX_TX_NEED_EXT_HDR) {
1703                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1704                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1705                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1706                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1707                 }
1708
1709                 if (flags & NIX_TX_MULTI_SEG_F) {
1710                         uint64_t seg_list[NIX_DESCS_PER_LOOP]
1711                                          [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
1712                         uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
1713
1714                         /* Build mseg list for each packet individually. */
1715                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1716                                 segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
1717                                                         seg_list[j], &cmd0[j],
1718                                                         &cmd1[j], flags);
1719                         segdw[4] = 8;
1720
1721                         /* Commit all changes to mbuf before LMTST. */
1722                         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1723                                 rte_io_wmb();
1724
1725                         cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
1726                                                        segdw, seg_list,
1727                                                        lmt_addr, io_addr,
1728                                                        flags);
1729                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1730                         /* With ext header in the command we can no longer send
1731                          * all 4 packets together since LMTLINE is 128bytes.
1732                          * Split and Tx twice.
1733                          */
1734                         do {
1735                                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1736                                         vst1q_u64(lmt_addr, cmd0[0]);
1737                                         vst1q_u64(lmt_addr + 2, cmd2[0]);
1738                                         vst1q_u64(lmt_addr + 4, cmd1[0]);
1739                                         vst1q_u64(lmt_addr + 6, cmd3[0]);
1740                                         vst1q_u64(lmt_addr + 8, cmd0[1]);
1741                                         vst1q_u64(lmt_addr + 10, cmd2[1]);
1742                                         vst1q_u64(lmt_addr + 12, cmd1[1]);
1743                                         vst1q_u64(lmt_addr + 14, cmd3[1]);
1744                                 } else {
1745                                         vst1q_u64(lmt_addr, cmd0[0]);
1746                                         vst1q_u64(lmt_addr + 2, cmd2[0]);
1747                                         vst1q_u64(lmt_addr + 4, cmd1[0]);
1748                                         vst1q_u64(lmt_addr + 6, cmd0[1]);
1749                                         vst1q_u64(lmt_addr + 8, cmd2[1]);
1750                                         vst1q_u64(lmt_addr + 10, cmd1[1]);
1751                                 }
1752                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1753                         } while (lmt_status == 0);
1754
1755                         do {
1756                                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1757                                         vst1q_u64(lmt_addr, cmd0[2]);
1758                                         vst1q_u64(lmt_addr + 2, cmd2[2]);
1759                                         vst1q_u64(lmt_addr + 4, cmd1[2]);
1760                                         vst1q_u64(lmt_addr + 6, cmd3[2]);
1761                                         vst1q_u64(lmt_addr + 8, cmd0[3]);
1762                                         vst1q_u64(lmt_addr + 10, cmd2[3]);
1763                                         vst1q_u64(lmt_addr + 12, cmd1[3]);
1764                                         vst1q_u64(lmt_addr + 14, cmd3[3]);
1765                                 } else {
1766                                         vst1q_u64(lmt_addr, cmd0[2]);
1767                                         vst1q_u64(lmt_addr + 2, cmd2[2]);
1768                                         vst1q_u64(lmt_addr + 4, cmd1[2]);
1769                                         vst1q_u64(lmt_addr + 6, cmd0[3]);
1770                                         vst1q_u64(lmt_addr + 8, cmd2[3]);
1771                                         vst1q_u64(lmt_addr + 10, cmd1[3]);
1772                                 }
1773                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1774                         } while (lmt_status == 0);
1775                 } else {
1776                         do {
1777                                 vst1q_u64(lmt_addr, cmd0[0]);
1778                                 vst1q_u64(lmt_addr + 2, cmd1[0]);
1779                                 vst1q_u64(lmt_addr + 4, cmd0[1]);
1780                                 vst1q_u64(lmt_addr + 6, cmd1[1]);
1781                                 vst1q_u64(lmt_addr + 8, cmd0[2]);
1782                                 vst1q_u64(lmt_addr + 10, cmd1[2]);
1783                                 vst1q_u64(lmt_addr + 12, cmd0[3]);
1784                                 vst1q_u64(lmt_addr + 14, cmd1[3]);
1785                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1786                         } while (lmt_status == 0);
1787                 }
1788                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1789         }
1790
1791         if (unlikely(pkts_left)) {
1792                 if (flags & NIX_TX_MULTI_SEG_F)
1793                         pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
1794                                                         pkts_left, cmd, flags);
1795                 else
1796                         pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
1797                                                    cmd, flags);
1798         }
1799
1800         return pkts;
1801 }
1802
1803 #else
1804 static __rte_always_inline uint16_t
1805 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1806                           uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1807 {
1808         RTE_SET_USED(tx_queue);
1809         RTE_SET_USED(tx_pkts);
1810         RTE_SET_USED(pkts);
1811         RTE_SET_USED(cmd);
1812         RTE_SET_USED(flags);
1813         return 0;
1814 }
1815 #endif
1816
1817 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
1818 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1819 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
1820 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
1821 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
1822 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
1823 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
1824
1825 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1826 #define NIX_TX_FASTPATH_MODES_0_15                                             \
1827         T(no_offload, 4, NIX_TX_OFFLOAD_NONE)                                  \
1828         T(l3l4csum, 4, L3L4CSUM_F)                                             \
1829         T(ol3ol4csum, 4, OL3OL4CSUM_F)                                         \
1830         T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F)                   \
1831         T(vlan, 6, VLAN_F)                                                     \
1832         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
1833         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
1834         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1835         T(noff, 4, NOFF_F)                                                     \
1836         T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F)                               \
1837         T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F)                           \
1838         T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1839         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
1840         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
1841         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
1842         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
1843           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1844
1845 #define NIX_TX_FASTPATH_MODES_16_31                                            \
1846         T(tso, 6, TSO_F)                                                       \
1847         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
1848         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
1849         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
1850         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
1851         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
1852         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
1853         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
1854           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1855         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
1856         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
1857         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
1858         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
1859           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1860         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
1861         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
1862         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1863         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
1864           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1865
1866 #define NIX_TX_FASTPATH_MODES_32_47                                            \
1867         T(ts, 8, TSP_F)                                                        \
1868         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
1869         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
1870         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
1871         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
1872         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
1873         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
1874         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
1875           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1876         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
1877         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
1878         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
1879         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
1880           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1881         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
1882         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
1883         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
1884         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
1885           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1886
1887 #define NIX_TX_FASTPATH_MODES_48_63                                            \
1888         T(ts_tso, 8, TSP_F | TSO_F)                                            \
1889         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
1890         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
1891         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
1892           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
1893         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
1894         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
1895         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
1896         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
1897           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
1898         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
1899         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
1900         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
1901         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
1902           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
1903         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
1904         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
1905           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
1906         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
1907           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
1908         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
1909           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1910
1911 #define NIX_TX_FASTPATH_MODES_64_79                                            \
1912         T(sec, 4, T_SEC_F)                                                     \
1913         T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F)                               \
1914         T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F)                           \
1915         T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1916         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
1917         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
1918         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
1919         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
1920           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
1921         T(sec_noff, 4, T_SEC_F | NOFF_F)                                       \
1922         T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
1923         T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
1924         T(sec_noff_ol3ol4csum_l3l4csum, 4,                                     \
1925           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
1926         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
1927         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
1928         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
1929           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
1930         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
1931           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1932
1933 #define NIX_TX_FASTPATH_MODES_80_95                                            \
1934         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
1935         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
1936         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
1937         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
1938           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
1939         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
1940         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
1941         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1942         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
1943           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
1944         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
1945         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
1946         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1947         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
1948           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
1949         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
1950         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
1951           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
1952         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
1953           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
1954         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
1955           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1956
1957 #define NIX_TX_FASTPATH_MODES_96_111                                           \
1958         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
1959         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
1960         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
1961         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
1962           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
1963         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
1964         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
1965         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
1966         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
1967           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
1968         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
1969         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
1970         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
1971         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
1972           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
1973         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
1974         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
1975           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
1976         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
1977           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
1978         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
1979           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1980
1981 #define NIX_TX_FASTPATH_MODES_112_127                                          \
1982         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
1983         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
1984         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
1985         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
1986           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
1987         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
1988         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
1989           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
1990         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
1991           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
1992         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
1993           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
1994         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
1995         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
1996           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
1997         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
1998           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
1999         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2000           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2001         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2002         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2003           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2004         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2005           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2006         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2007           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2008                   L3L4CSUM_F)
2009
2010 #define NIX_TX_FASTPATH_MODES                                                  \
2011         NIX_TX_FASTPATH_MODES_0_15                                             \
2012         NIX_TX_FASTPATH_MODES_16_31                                            \
2013         NIX_TX_FASTPATH_MODES_32_47                                            \
2014         NIX_TX_FASTPATH_MODES_48_63                                            \
2015         NIX_TX_FASTPATH_MODES_64_79                                            \
2016         NIX_TX_FASTPATH_MODES_80_95                                            \
2017         NIX_TX_FASTPATH_MODES_96_111                                           \
2018         NIX_TX_FASTPATH_MODES_112_127
2019
2020 #define T(name, sz, flags)                                                     \
2021         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name(           \
2022                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2023         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name(      \
2024                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2025         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name(       \
2026                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2027         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name(  \
2028                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2029
2030 NIX_TX_FASTPATH_MODES
2031 #undef T
2032
2033 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2034         uint16_t __rte_noinline __rte_hot fn(                                  \
2035                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2036         {                                                                      \
2037                 uint64_t cmd[sz];                                              \
2038                 /* For TSO inner checksum is a must */                         \
2039                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2040                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2041                         return 0;                                              \
2042                 return cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd,        \
2043                                           flags);                              \
2044         }
2045
2046 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2047         uint16_t __rte_noinline __rte_hot fn(                                  \
2048                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2049         {                                                                      \
2050                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2051                 /* For TSO inner checksum is a must */                         \
2052                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2053                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2054                         return 0;                                              \
2055                 return cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,   \
2056                                                (flags) | NIX_TX_MULTI_SEG_F);  \
2057         }
2058
2059 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
2060         uint16_t __rte_noinline __rte_hot fn(                                  \
2061                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2062         {                                                                      \
2063                 uint64_t cmd[sz];                                              \
2064                 /* For TSO inner checksum is a must */                         \
2065                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2066                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2067                         return 0;                                              \
2068                 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2069                                                  (flags));                     \
2070         }
2071
2072 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
2073         uint16_t __rte_noinline __rte_hot fn(                                  \
2074                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2075         {                                                                      \
2076                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2077                 /* For TSO inner checksum is a must */                         \
2078                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2079                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2080                         return 0;                                              \
2081                 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2082                                                  (flags) |                     \
2083                                                          NIX_TX_MULTI_SEG_F);  \
2084         }
2085
2086 #endif /* __CN9K_TX_H__ */