net/cnxk: add SoC specific PTP timestamp read
[dpdk.git] / drivers / net / cnxk / cn9k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN9K_TX_H__
5 #define __CN9K_TX_H__
6
7 #include <rte_vect.h>
8
9 #define NIX_TX_OFFLOAD_NONE           (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
16 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
17 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
18
19 /* Flags to control xmit_prepare function.
20  * Defining it from backwards to denote its been
21  * not used as offload flags to pick function
22  */
23 #define NIX_TX_MULTI_SEG_F BIT(15)
24
25 #define NIX_TX_NEED_SEND_HDR_W1                                                \
26         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
27          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
28
29 #define NIX_TX_NEED_EXT_HDR                                                    \
30         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
31          NIX_TX_OFFLOAD_TSO_F)
32
33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
34         do {                                                                   \
35                 /* Cached value is low, Update the fc_cache_pkts */            \
36                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
37                         /* Multiply with sqe_per_sqb to express in pkts */     \
38                         (txq)->fc_cache_pkts =                                 \
39                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
40                                 << (txq)->sqes_per_sqb_log2;                   \
41                         /* Check it again for the room */                      \
42                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
43                                 return 0;                                      \
44                 }                                                              \
45         } while (0)
46
47 /* Function to determine no of tx subdesc required in case ext
48  * sub desc is enabled.
49  */
50 static __rte_always_inline int
51 cn9k_nix_tx_ext_subs(const uint16_t flags)
52 {
53         return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
54                        ? 2
55                        : ((flags &
56                            (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
57                                   ? 1
58                                   : 0);
59 }
60
61 static __rte_always_inline void
62 cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,
63                      const uint16_t flags, const uint16_t static_sz)
64 {
65         if (static_sz)
66                 cmd[0] = txq->send_hdr_w0;
67         else
68                 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
69                          ((uint64_t)(cn9k_nix_tx_ext_subs(flags) + 1) << 40);
70         cmd[1] = 0;
71
72         if (flags & NIX_TX_NEED_EXT_HDR) {
73                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
74                         cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
75                 else
76                         cmd[2] = NIX_SUBDC_EXT << 60;
77                 cmd[3] = 0;
78                 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
79         } else {
80                 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
81         }
82 }
83
84 static __rte_always_inline void
85 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
86 {
87         uint64_t mask, ol_flags = m->ol_flags;
88
89         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
90                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
91                 uint16_t *iplen, *oiplen, *oudplen;
92                 uint16_t lso_sb, paylen;
93
94                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
95                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
96                          m->l2_len + m->l3_len + m->l4_len;
97
98                 /* Reduce payload len from base headers */
99                 paylen = m->pkt_len - lso_sb;
100
101                 /* Get iplen position assuming no tunnel hdr */
102                 iplen = (uint16_t *)(mdata + m->l2_len +
103                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
104                 /* Handle tunnel tso */
105                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
106                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
107                         const uint8_t is_udp_tun =
108                                 (CNXK_NIX_UDP_TUN_BITMASK >>
109                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
110                                 0x1;
111
112                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
113                                               (2 << !!(ol_flags &
114                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
115                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
116                                                    paylen);
117
118                         /* Update format for UDP tunneled packet */
119                         if (is_udp_tun) {
120                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
121                                                        m->outer_l3_len + 4);
122                                 *oudplen = rte_cpu_to_be_16(
123                                         rte_be_to_cpu_16(*oudplen) - paylen);
124                         }
125
126                         /* Update iplen position to inner ip hdr */
127                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
128                                              m->l4_len +
129                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
130                 }
131
132                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
133         }
134 }
135
136 static __rte_always_inline void
137 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
138                       const uint64_t lso_tun_fmt)
139 {
140         struct nix_send_ext_s *send_hdr_ext;
141         struct nix_send_hdr_s *send_hdr;
142         uint64_t ol_flags = 0, mask;
143         union nix_send_hdr_w1_u w1;
144         union nix_send_sg_s *sg;
145
146         send_hdr = (struct nix_send_hdr_s *)cmd;
147         if (flags & NIX_TX_NEED_EXT_HDR) {
148                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
149                 sg = (union nix_send_sg_s *)(cmd + 4);
150                 /* Clear previous markings */
151                 send_hdr_ext->w0.lso = 0;
152                 send_hdr_ext->w1.u = 0;
153         } else {
154                 sg = (union nix_send_sg_s *)(cmd + 2);
155         }
156
157         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
158                 ol_flags = m->ol_flags;
159                 w1.u = 0;
160         }
161
162         if (!(flags & NIX_TX_MULTI_SEG_F))
163                 send_hdr->w0.total = m->data_len;
164         else
165                 send_hdr->w0.total = m->pkt_len;
166         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
167
168         /*
169          * L3type:  2 => IPV4
170          *          3 => IPV4 with csum
171          *          4 => IPV6
172          * L3type and L3ptr needs to be set for either
173          * L3 csum or L4 csum or LSO
174          *
175          */
176
177         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
178             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
179                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
180                 const uint8_t ol3type =
181                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
182                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
183                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
184
185                 /* Outer L3 */
186                 w1.ol3type = ol3type;
187                 mask = 0xffffull << ((!!ol3type) << 4);
188                 w1.ol3ptr = ~mask & m->outer_l2_len;
189                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
190
191                 /* Outer L4 */
192                 w1.ol4type = csum + (csum << 1);
193
194                 /* Inner L3 */
195                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
196                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
197                 w1.il3ptr = w1.ol4ptr + m->l2_len;
198                 w1.il4ptr = w1.il3ptr + m->l3_len;
199                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
200                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
201
202                 /* Inner L4 */
203                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
204
205                 /* In case of no tunnel header use only
206                  * shift IL3/IL4 fields a bit to use
207                  * OL3/OL4 for header checksum
208                  */
209                 mask = !ol3type;
210                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
211                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
212
213         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
214                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
215                 const uint8_t outer_l2_len = m->outer_l2_len;
216
217                 /* Outer L3 */
218                 w1.ol3ptr = outer_l2_len;
219                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
220                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
221                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
222                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
223                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
224
225                 /* Outer L4 */
226                 w1.ol4type = csum + (csum << 1);
227
228         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
229                 const uint8_t l2_len = m->l2_len;
230
231                 /* Always use OLXPTR and OLXTYPE when only
232                  * when one header is present
233                  */
234
235                 /* Inner L3 */
236                 w1.ol3ptr = l2_len;
237                 w1.ol4ptr = l2_len + m->l3_len;
238                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
239                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
240                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
241                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
242
243                 /* Inner L4 */
244                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
245         }
246
247         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
248                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
249                 /* HW will update ptr after vlan0 update */
250                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
251                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
252
253                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
254                 /* 2B before end of l2 header */
255                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
256                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
257         }
258
259         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
260                 uint16_t lso_sb;
261                 uint64_t mask;
262
263                 mask = -(!w1.il3type);
264                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
265
266                 send_hdr_ext->w0.lso_sb = lso_sb;
267                 send_hdr_ext->w0.lso = 1;
268                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
269                 send_hdr_ext->w0.lso_format =
270                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
271                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
272
273                 /* Handle tunnel tso */
274                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
275                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
276                         const uint8_t is_udp_tun =
277                                 (CNXK_NIX_UDP_TUN_BITMASK >>
278                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
279                                 0x1;
280                         uint8_t shift = is_udp_tun ? 32 : 0;
281
282                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
283                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
284
285                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
286                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
287                         /* Update format for UDP tunneled packet */
288                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
289                 }
290         }
291
292         if (flags & NIX_TX_NEED_SEND_HDR_W1)
293                 send_hdr->w1.u = w1.u;
294
295         if (!(flags & NIX_TX_MULTI_SEG_F)) {
296                 sg->seg1_size = m->data_len;
297                 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
298
299                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
300                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
301                          *              is greater than 1
302                          * DF bit = 0 otherwise
303                          */
304                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
305                         /* Ensuring mbuf fields which got updated in
306                          * cnxk_nix_prefree_seg are written before LMTST.
307                          */
308                         rte_io_wmb();
309                 }
310                 /* Mark mempool object as "put" since it is freed by NIX */
311                 if (!send_hdr->w0.df)
312                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
313         } else {
314                 sg->seg1_size = m->data_len;
315                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
316
317                 /* NOFF is handled later for multi-seg */
318         }
319 }
320
321 static __rte_always_inline void
322 cn9k_nix_xmit_prepare_tstamp(struct cn9k_eth_txq *txq, uint64_t *cmd,
323                              const uint64_t ol_flags, const uint16_t no_segdw,
324                              const uint16_t flags)
325 {
326         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
327                 struct nix_send_mem_s *send_mem;
328                 uint16_t off = (no_segdw - 1) << 1;
329                 const uint8_t is_ol_tstamp =
330                         !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
331
332                 send_mem = (struct nix_send_mem_s *)(cmd + off);
333
334                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
335                  * should not be recorded, hence changing the alg type to
336                  * NIX_SENDMEMALG_SUB and also changing send mem addr field to
337                  * next 8 bytes as it corrupts the actual Tx tstamp registered
338                  * address.
339                  */
340                 send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM;
341                 send_mem->w0.cn9k.alg =
342                         NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
343
344                 send_mem->addr = (rte_iova_t)(((uint64_t *)txq->ts_mem) +
345                                 (is_ol_tstamp));
346         }
347 }
348
349 static __rte_always_inline void
350 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
351                   const uint32_t flags)
352 {
353         uint64_t lmt_status;
354
355         do {
356                 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
357                 lmt_status = roc_lmt_submit_ldeor(io_addr);
358         } while (lmt_status == 0);
359 }
360
361 static __rte_always_inline void
362 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
363 {
364         roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
365 }
366
367 static __rte_always_inline uint64_t
368 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
369 {
370         return roc_lmt_submit_ldeor(io_addr);
371 }
372
373 static __rte_always_inline uint64_t
374 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
375 {
376         return roc_lmt_submit_ldeorl(io_addr);
377 }
378
379 static __rte_always_inline uint16_t
380 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
381 {
382         struct nix_send_hdr_s *send_hdr;
383         union nix_send_sg_s *sg;
384         struct rte_mbuf *m_next;
385         uint64_t *slist, sg_u;
386         uint64_t nb_segs;
387         uint64_t segdw;
388         uint8_t off, i;
389
390         send_hdr = (struct nix_send_hdr_s *)cmd;
391
392         if (flags & NIX_TX_NEED_EXT_HDR)
393                 off = 2;
394         else
395                 off = 0;
396
397         sg = (union nix_send_sg_s *)&cmd[2 + off];
398
399         /* Start from second segment, first segment is already there */
400         i = 1;
401         sg_u = sg->u;
402         nb_segs = m->nb_segs - 1;
403         m_next = m->next;
404         slist = &cmd[3 + off + 1];
405
406         /* Set invert df if buffer is not to be freed by H/W */
407         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
408                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
409                 rte_io_wmb();
410         }
411
412         /* Mark mempool object as "put" since it is freed by NIX */
413 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
414         if (!(sg_u & (1ULL << 55)))
415                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
416         rte_io_wmb();
417 #endif
418         m = m_next;
419         if (!m)
420                 goto done;
421
422         /* Fill mbuf segments */
423         do {
424                 m_next = m->next;
425                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
426                 *slist = rte_mbuf_data_iova(m);
427                 /* Set invert df if buffer is not to be freed by H/W */
428                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
429                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
430                         /* Commit changes to mbuf */
431                         rte_io_wmb();
432                 }
433                 /* Mark mempool object as "put" since it is freed by NIX */
434 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
435                 if (!(sg_u & (1ULL << (i + 55))))
436                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
437                 rte_io_wmb();
438 #endif
439                 slist++;
440                 i++;
441                 nb_segs--;
442                 if (i > 2 && nb_segs) {
443                         i = 0;
444                         /* Next SG subdesc */
445                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
446                         sg->u = sg_u;
447                         sg->segs = 3;
448                         sg = (union nix_send_sg_s *)slist;
449                         sg_u = sg->u;
450                         slist++;
451                 }
452                 m = m_next;
453         } while (nb_segs);
454
455 done:
456         sg->u = sg_u;
457         sg->segs = i;
458         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
459         /* Roundup extra dwords to multiple of 2 */
460         segdw = (segdw >> 1) + (segdw & 0x1);
461         /* Default dwords */
462         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
463         send_hdr->w0.sizem1 = segdw - 1;
464
465         return segdw;
466 }
467
468 static __rte_always_inline void
469 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
470 {
471         roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
472 }
473
474 static __rte_always_inline void
475 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
476                        uint16_t segdw)
477 {
478         uint64_t lmt_status;
479
480         do {
481                 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
482                 lmt_status = roc_lmt_submit_ldeor(io_addr);
483         } while (lmt_status == 0);
484 }
485
486 static __rte_always_inline void
487 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
488                                rte_iova_t io_addr, uint16_t segdw)
489 {
490         uint64_t lmt_status;
491
492         rte_io_wmb();
493         do {
494                 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
495                 lmt_status = roc_lmt_submit_ldeor(io_addr);
496         } while (lmt_status == 0);
497 }
498
499 static __rte_always_inline uint16_t
500 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
501                    uint64_t *cmd, const uint16_t flags)
502 {
503         struct cn9k_eth_txq *txq = tx_queue;
504         const rte_iova_t io_addr = txq->io_addr;
505         void *lmt_addr = txq->lmt_addr;
506         uint64_t lso_tun_fmt;
507         uint16_t i;
508
509         NIX_XMIT_FC_OR_RETURN(txq, pkts);
510
511         cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
512
513         /* Perform header writes before barrier for TSO */
514         if (flags & NIX_TX_OFFLOAD_TSO_F) {
515                 lso_tun_fmt = txq->lso_tun_fmt;
516
517                 for (i = 0; i < pkts; i++)
518                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
519         }
520
521         /* Lets commit any changes in the packet here as no further changes
522          * to the packet will be done unless no fast free is enabled.
523          */
524         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
525                 rte_io_wmb();
526
527         for (i = 0; i < pkts; i++) {
528                 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
529                 cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,
530                                              flags);
531                 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
532         }
533
534         /* Reduce the cached count */
535         txq->fc_cache_pkts -= pkts;
536
537         return pkts;
538 }
539
540 static __rte_always_inline uint16_t
541 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
542                         uint16_t pkts, uint64_t *cmd, const uint16_t flags)
543 {
544         struct cn9k_eth_txq *txq = tx_queue;
545         const rte_iova_t io_addr = txq->io_addr;
546         void *lmt_addr = txq->lmt_addr;
547         uint64_t lso_tun_fmt;
548         uint16_t segdw;
549         uint64_t i;
550
551         NIX_XMIT_FC_OR_RETURN(txq, pkts);
552
553         cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
554
555         /* Perform header writes before barrier for TSO */
556         if (flags & NIX_TX_OFFLOAD_TSO_F) {
557                 lso_tun_fmt = txq->lso_tun_fmt;
558
559                 for (i = 0; i < pkts; i++)
560                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
561         }
562
563         /* Lets commit any changes in the packet here as no further changes
564          * to the packet will be done unless no fast free is enabled.
565          */
566         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
567                 rte_io_wmb();
568
569         for (i = 0; i < pkts; i++) {
570                 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
571                 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
572                 cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,
573                                              segdw, flags);
574                 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
575         }
576
577         /* Reduce the cached count */
578         txq->fc_cache_pkts -= pkts;
579
580         return pkts;
581 }
582
583 #if defined(RTE_ARCH_ARM64)
584
585 static __rte_always_inline void
586 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
587                      union nix_send_ext_w0_u *w0, uint64_t ol_flags,
588                      uint64_t flags)
589 {
590         uint16_t lso_sb;
591         uint64_t mask;
592
593         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
594                 return;
595
596         mask = -(!w1->il3type);
597         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
598
599         w0->u |= BIT(14);
600         w0->lso_sb = lso_sb;
601         w0->lso_mps = m->tso_segsz;
602         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
603         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
604
605         /* Handle tunnel tso */
606         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
607             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
608                 const uint8_t is_udp_tun =
609                         (CNXK_NIX_UDP_TUN_BITMASK >>
610                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
611                         0x1;
612
613                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
614                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
615                 /* Update format for UDP tunneled packet */
616                 w0->lso_format += is_udp_tun ? 2 : 6;
617
618                 w0->lso_format += !!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 1;
619         }
620 }
621
622 static __rte_always_inline uint8_t
623 cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
624                                union nix_send_hdr_w0_u *sh,
625                                union nix_send_sg_s *sg, const uint32_t flags)
626 {
627         struct rte_mbuf *m_next;
628         uint64_t *slist, sg_u;
629         uint16_t nb_segs;
630         uint64_t segdw;
631         int i = 1;
632
633         sh->total = m->pkt_len;
634         /* Clear sg->u header before use */
635         sg->u &= 0xFC00000000000000;
636         sg_u = sg->u;
637         slist = &cmd[0];
638
639         sg_u = sg_u | ((uint64_t)m->data_len);
640
641         nb_segs = m->nb_segs - 1;
642         m_next = m->next;
643
644         /* Set invert df if buffer is not to be freed by H/W */
645         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
646                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
647                 /* Mark mempool object as "put" since it is freed by NIX */
648 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
649         if (!(sg_u & (1ULL << 55)))
650                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
651         rte_io_wmb();
652 #endif
653
654         m = m_next;
655         /* Fill mbuf segments */
656         do {
657                 m_next = m->next;
658                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
659                 *slist = rte_mbuf_data_iova(m);
660                 /* Set invert df if buffer is not to be freed by H/W */
661                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
662                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
663                         /* Mark mempool object as "put" since it is freed by NIX
664                          */
665 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
666                 if (!(sg_u & (1ULL << (i + 55))))
667                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
668                 rte_io_wmb();
669 #endif
670                 slist++;
671                 i++;
672                 nb_segs--;
673                 if (i > 2 && nb_segs) {
674                         i = 0;
675                         /* Next SG subdesc */
676                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
677                         sg->u = sg_u;
678                         sg->segs = 3;
679                         sg = (union nix_send_sg_s *)slist;
680                         sg_u = sg->u;
681                         slist++;
682                 }
683                 m = m_next;
684         } while (nb_segs);
685
686         sg->u = sg_u;
687         sg->segs = i;
688         segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
689
690         segdw += 2;
691         /* Roundup extra dwords to multiple of 2 */
692         segdw = (segdw >> 1) + (segdw & 0x1);
693         /* Default dwords */
694         segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
695                  !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
696         sh->sizem1 = segdw - 1;
697
698         return segdw;
699 }
700
701 static __rte_always_inline uint8_t
702 cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
703                           uint64x2_t *cmd1, const uint32_t flags)
704 {
705         union nix_send_hdr_w0_u sh;
706         union nix_send_sg_s sg;
707         uint8_t ret;
708
709         if (m->nb_segs == 1) {
710                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
711                         sg.u = vgetq_lane_u64(cmd1[0], 0);
712                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
713                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
714                 }
715
716 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
717                 sg.u = vgetq_lane_u64(cmd1[0], 0);
718                 if (!(sg.u & (1ULL << 55)))
719                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
720                 rte_io_wmb();
721 #endif
722                 return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
723                        !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
724         }
725
726         sh.u = vgetq_lane_u64(cmd0[0], 0);
727         sg.u = vgetq_lane_u64(cmd1[0], 0);
728
729         ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
730
731         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
732         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
733         return ret;
734 }
735
736 #define NIX_DESCS_PER_LOOP 4
737
738 static __rte_always_inline void
739 cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
740                                uint64x2_t *cmd2, uint64x2_t *cmd3,
741                                uint8_t *segdw,
742                                uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
743                                uint64_t *lmt_addr, rte_iova_t io_addr,
744                                const uint32_t flags)
745 {
746         uint64_t lmt_status;
747         uint8_t j, off;
748
749         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
750             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
751                 /* No segments in 4 consecutive packets. */
752                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
753                         do {
754                                 vst1q_u64(lmt_addr, cmd0[0]);
755                                 vst1q_u64(lmt_addr + 2, cmd1[0]);
756                                 vst1q_u64(lmt_addr + 4, cmd0[1]);
757                                 vst1q_u64(lmt_addr + 6, cmd1[1]);
758                                 vst1q_u64(lmt_addr + 8, cmd0[2]);
759                                 vst1q_u64(lmt_addr + 10, cmd1[2]);
760                                 vst1q_u64(lmt_addr + 12, cmd0[3]);
761                                 vst1q_u64(lmt_addr + 14, cmd1[3]);
762                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
763                         } while (lmt_status == 0);
764
765                         return;
766                 }
767         }
768
769         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
770                 /* Fit consecutive packets in same LMTLINE. */
771                 if ((segdw[j] + segdw[j + 1]) <= 8) {
772 again0:
773                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
774                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
775                                 vst1q_u64(lmt_addr, cmd0[j]);
776                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
777                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
778                                 /* Copy segs */
779                                 off = segdw[j] - 4;
780                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
781                                 off <<= 1;
782                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
783
784                                 vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
785                                 vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
786                                 vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
787                                 roc_lmt_mov_seg(lmt_addr + 14 + off,
788                                                 slist[j + 1], segdw[j + 1] - 4);
789                                 off += ((segdw[j + 1] - 4) << 1);
790                                 vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
791                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
792                                 vst1q_u64(lmt_addr, cmd0[j]);
793                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
794                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
795                                 /* Copy segs */
796                                 off = segdw[j] - 3;
797                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
798                                 off <<= 1;
799                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
800                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
801                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
802                                 roc_lmt_mov_seg(lmt_addr + 12 + off,
803                                                 slist[j + 1], segdw[j + 1] - 3);
804                         } else {
805                                 vst1q_u64(lmt_addr, cmd0[j]);
806                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
807                                 /* Copy segs */
808                                 off = segdw[j] - 2;
809                                 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
810                                 off <<= 1;
811                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
812                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
813                                 roc_lmt_mov_seg(lmt_addr + 8 + off,
814                                                 slist[j + 1], segdw[j + 1] - 2);
815                         }
816                         lmt_status = roc_lmt_submit_ldeor(io_addr);
817                         if (lmt_status == 0)
818                                 goto again0;
819                         j += 2;
820                 } else {
821 again1:
822                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
823                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
824                                 vst1q_u64(lmt_addr, cmd0[j]);
825                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
826                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
827                                 /* Copy segs */
828                                 off = segdw[j] - 4;
829                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
830                                 off <<= 1;
831                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
832                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
833                                 vst1q_u64(lmt_addr, cmd0[j]);
834                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
835                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
836                                 /* Copy segs */
837                                 off = segdw[j] - 3;
838                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
839                         } else {
840                                 vst1q_u64(lmt_addr, cmd0[j]);
841                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
842                                 /* Copy segs */
843                                 off = segdw[j] - 2;
844                                 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
845                         }
846                         lmt_status = roc_lmt_submit_ldeor(io_addr);
847                         if (lmt_status == 0)
848                                 goto again1;
849                         j += 1;
850                 }
851         }
852 }
853
854 static __rte_always_inline uint16_t
855 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
856                           uint16_t pkts, uint64_t *cmd, const uint16_t flags)
857 {
858         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
859         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
860         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
861                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
862         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
863         uint64x2_t senddesc01_w0, senddesc23_w0;
864         uint64x2_t senddesc01_w1, senddesc23_w1;
865         uint64x2_t sendext01_w0, sendext23_w0;
866         uint64x2_t sendext01_w1, sendext23_w1;
867         uint64x2_t sendmem01_w0, sendmem23_w0;
868         uint64x2_t sendmem01_w1, sendmem23_w1;
869         uint64x2_t sgdesc01_w0, sgdesc23_w0;
870         uint64x2_t sgdesc01_w1, sgdesc23_w1;
871         struct cn9k_eth_txq *txq = tx_queue;
872         uint64_t *lmt_addr = txq->lmt_addr;
873         rte_iova_t io_addr = txq->io_addr;
874         uint64x2_t ltypes01, ltypes23;
875         uint64x2_t xtmp128, ytmp128;
876         uint64x2_t xmask01, xmask23;
877         uint64_t lmt_status, i;
878         uint16_t pkts_left;
879
880         NIX_XMIT_FC_OR_RETURN(txq, pkts);
881
882         pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
883         pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
884
885         /* Reduce the cached count */
886         txq->fc_cache_pkts -= pkts;
887
888         /* Perform header writes before barrier for TSO */
889         if (flags & NIX_TX_OFFLOAD_TSO_F) {
890                 for (i = 0; i < pkts; i++)
891                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
892         }
893
894         /* Lets commit any changes in the packet here as no further changes
895          * to the packet will be done unless no fast free is enabled.
896          */
897         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
898                 rte_io_wmb();
899
900         senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
901         senddesc23_w0 = senddesc01_w0;
902
903         senddesc01_w1 = vdupq_n_u64(0);
904         senddesc23_w1 = senddesc01_w1;
905         sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
906         sgdesc23_w0 = sgdesc01_w0;
907
908         if (flags & NIX_TX_NEED_EXT_HDR) {
909                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
910                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
911                                                    BIT_ULL(15));
912                         sendmem01_w0 =
913                                 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
914                                             (NIX_SENDMEMALG_SETTSTMP << 56));
915                         sendmem23_w0 = sendmem01_w0;
916                         sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
917                         sendmem23_w1 = sendmem01_w1;
918                 } else {
919                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
920                 }
921                 sendext23_w0 = sendext01_w0;
922
923                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
924                         sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
925                 else
926                         sendext01_w1 = vdupq_n_u64(0);
927                 sendext23_w1 = sendext01_w1;
928         }
929
930         for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
931                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
932                 senddesc01_w0 =
933                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
934                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
935
936                 senddesc23_w0 = senddesc01_w0;
937                 sgdesc23_w0 = sgdesc01_w0;
938
939                 /* Clear vlan enables. */
940                 if (flags & NIX_TX_NEED_EXT_HDR) {
941                         sendext01_w1 = vbicq_u64(sendext01_w1,
942                                                  vdupq_n_u64(0x3FFFF00FFFF00));
943                         sendext23_w1 = sendext01_w1;
944                 }
945
946                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
947                         /* Reset send mem alg to SETTSTMP from SUB*/
948                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
949                                                  vdupq_n_u64(BIT_ULL(59)));
950                         /* Reset send mem address to default. */
951                         sendmem01_w1 =
952                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
953                         sendmem23_w0 = sendmem01_w0;
954                         sendmem23_w1 = sendmem01_w1;
955                 }
956
957                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
958                         /* Clear the LSO enable bit. */
959                         sendext01_w0 = vbicq_u64(sendext01_w0,
960                                                  vdupq_n_u64(BIT_ULL(14)));
961                         sendext23_w0 = sendext01_w0;
962                 }
963
964                 /* Move mbufs to iova */
965                 mbuf0 = (uint64_t *)tx_pkts[0];
966                 mbuf1 = (uint64_t *)tx_pkts[1];
967                 mbuf2 = (uint64_t *)tx_pkts[2];
968                 mbuf3 = (uint64_t *)tx_pkts[3];
969
970                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
971                                      offsetof(struct rte_mbuf, buf_iova));
972                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
973                                      offsetof(struct rte_mbuf, buf_iova));
974                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
975                                      offsetof(struct rte_mbuf, buf_iova));
976                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
977                                      offsetof(struct rte_mbuf, buf_iova));
978                 /*
979                  * Get mbuf's, olflags, iova, pktlen, dataoff
980                  * dataoff_iovaX.D[0] = iova,
981                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
982                  * len_olflagsX.D[0] = ol_flags,
983                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
984                  */
985                 dataoff_iova0 = vld1q_u64(mbuf0);
986                 len_olflags0 = vld1q_u64(mbuf0 + 2);
987                 dataoff_iova1 = vld1q_u64(mbuf1);
988                 len_olflags1 = vld1q_u64(mbuf1 + 2);
989                 dataoff_iova2 = vld1q_u64(mbuf2);
990                 len_olflags2 = vld1q_u64(mbuf2 + 2);
991                 dataoff_iova3 = vld1q_u64(mbuf3);
992                 len_olflags3 = vld1q_u64(mbuf3 + 2);
993
994                 /* Move mbufs to point pool */
995                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
996                                      offsetof(struct rte_mbuf, pool) -
997                                      offsetof(struct rte_mbuf, buf_iova));
998                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
999                                      offsetof(struct rte_mbuf, pool) -
1000                                      offsetof(struct rte_mbuf, buf_iova));
1001                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1002                                      offsetof(struct rte_mbuf, pool) -
1003                                      offsetof(struct rte_mbuf, buf_iova));
1004                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1005                                      offsetof(struct rte_mbuf, pool) -
1006                                      offsetof(struct rte_mbuf, buf_iova));
1007
1008                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1009                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1010                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1011                         /*
1012                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1013                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1014                          */
1015
1016                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1017                                      : [a] "+w"(senddesc01_w1)
1018                                      : [in] "r"(mbuf0 + 2)
1019                                      : "memory");
1020
1021                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1022                                      : [a] "+w"(senddesc01_w1)
1023                                      : [in] "r"(mbuf1 + 2)
1024                                      : "memory");
1025
1026                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1027                                      : [b] "+w"(senddesc23_w1)
1028                                      : [in] "r"(mbuf2 + 2)
1029                                      : "memory");
1030
1031                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1032                                      : [b] "+w"(senddesc23_w1)
1033                                      : [in] "r"(mbuf3 + 2)
1034                                      : "memory");
1035
1036                         /* Get pool pointer alone */
1037                         mbuf0 = (uint64_t *)*mbuf0;
1038                         mbuf1 = (uint64_t *)*mbuf1;
1039                         mbuf2 = (uint64_t *)*mbuf2;
1040                         mbuf3 = (uint64_t *)*mbuf3;
1041                 } else {
1042                         /* Get pool pointer alone */
1043                         mbuf0 = (uint64_t *)*mbuf0;
1044                         mbuf1 = (uint64_t *)*mbuf1;
1045                         mbuf2 = (uint64_t *)*mbuf2;
1046                         mbuf3 = (uint64_t *)*mbuf3;
1047                 }
1048
1049                 const uint8x16_t shuf_mask2 = {
1050                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1051                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1052                 };
1053                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1054                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1055
1056                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1057                 const uint64x2_t and_mask0 = {
1058                         0xFFFFFFFFFFFFFFFF,
1059                         0x000000000000FFFF,
1060                 };
1061
1062                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1063                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1064                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1065                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1066
1067                 /*
1068                  * Pick only 16 bits of pktlen preset at bits 63:32
1069                  * and place them at bits 15:0.
1070                  */
1071                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1072                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1073
1074                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1075                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1076                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1077
1078                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1079                  * pktlen at 15:0 position.
1080                  */
1081                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1082                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1083                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1084                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1085
1086                 /* Move mbuf to point to pool_id. */
1087                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1088                                      offsetof(struct rte_mempool, pool_id));
1089                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1090                                      offsetof(struct rte_mempool, pool_id));
1091                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1092                                      offsetof(struct rte_mempool, pool_id));
1093                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1094                                      offsetof(struct rte_mempool, pool_id));
1095
1096                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1097                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1098                         /*
1099                          * Lookup table to translate ol_flags to
1100                          * il3/il4 types. But we still use ol3/ol4 types in
1101                          * senddesc_w1 as only one header processing is enabled.
1102                          */
1103                         const uint8x16_t tbl = {
1104                                 /* [0-15] = il4type:il3type */
1105                                 0x04, /* none (IPv6 assumed) */
1106                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1107                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1108                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1109                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1110                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1111                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1112                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1113                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1114                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1115                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1116                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1117                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1118                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1119                                        * RTE_MBUF_F_TX_TCP_CKSUM
1120                                        */
1121                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1122                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1123                                        */
1124                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1125                                        * RTE_MBUF_F_TX_UDP_CKSUM
1126                                        */
1127                         };
1128
1129                         /* Extract olflags to translate to iltypes */
1130                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1131                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1132
1133                         /*
1134                          * E(47):L3_LEN(9):L2_LEN(7+z)
1135                          * E(47):L3_LEN(9):L2_LEN(7+z)
1136                          */
1137                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1138                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1139
1140                         /* Move OLFLAGS bits 55:52 to 51:48
1141                          * with zeros preprended on the byte and rest
1142                          * don't care
1143                          */
1144                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1145                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1146                         /*
1147                          * E(48):L3_LEN(8):L2_LEN(z+7)
1148                          * E(48):L3_LEN(8):L2_LEN(z+7)
1149                          */
1150                         const int8x16_t tshft3 = {
1151                                 -1, 0, 8, 8, 8, 8, 8, 8,
1152                                 -1, 0, 8, 8, 8, 8, 8, 8,
1153                         };
1154
1155                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1156                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1157
1158                         /* Do the lookup */
1159                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1160                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1161
1162                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1163                          * and place it in ol3/ol4type of senddesc_w1
1164                          */
1165                         const uint8x16_t shuf_mask0 = {
1166                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1167                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1168                         };
1169
1170                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1171                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1172
1173                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1174                          * a [E(32):E(16):OL3(8):OL2(8)]
1175                          * a = a + (a << 8)
1176                          * a [E(32):E(16):(OL3+OL2):OL2]
1177                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1178                          */
1179                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1180                                                  vshlq_n_u16(senddesc01_w1, 8));
1181                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1182                                                  vshlq_n_u16(senddesc23_w1, 8));
1183
1184                         /* Move ltypes to senddesc*_w1 */
1185                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1186                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1187                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1188                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1189                         /*
1190                          * Lookup table to translate ol_flags to
1191                          * ol3/ol4 types.
1192                          */
1193
1194                         const uint8x16_t tbl = {
1195                                 /* [0-15] = ol4type:ol3type */
1196                                 0x00, /* none */
1197                                 0x03, /* OUTER_IP_CKSUM */
1198                                 0x02, /* OUTER_IPV4 */
1199                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1200                                 0x04, /* OUTER_IPV6 */
1201                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1202                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1203                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1204                                        * OUTER_IP_CKSUM
1205                                        */
1206                                 0x00, /* OUTER_UDP_CKSUM */
1207                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1208                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1209                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1210                                        * OUTER_IP_CKSUM
1211                                        */
1212                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1213                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1214                                        * OUTER_IP_CKSUM
1215                                        */
1216                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1217                                        * OUTER_IPV4
1218                                        */
1219                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1220                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1221                                        */
1222                         };
1223
1224                         /* Extract olflags to translate to iltypes */
1225                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1226                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1227
1228                         /*
1229                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1230                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1231                          */
1232                         const uint8x16_t shuf_mask5 = {
1233                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1234                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1235                         };
1236                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1237                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1238
1239                         /* Extract outer ol flags only */
1240                         const uint64x2_t o_cksum_mask = {
1241                                 0x1C00020000000000,
1242                                 0x1C00020000000000,
1243                         };
1244
1245                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1246                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1247
1248                         /* Extract OUTER_UDP_CKSUM bit 41 and
1249                          * move it to bit 61
1250                          */
1251
1252                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1253                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1254
1255                         /* Shift oltype by 2 to start nibble from BIT(56)
1256                          * instead of BIT(58)
1257                          */
1258                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1259                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1260                         /*
1261                          * E(48):L3_LEN(8):L2_LEN(z+7)
1262                          * E(48):L3_LEN(8):L2_LEN(z+7)
1263                          */
1264                         const int8x16_t tshft3 = {
1265                                 -1, 0, 8, 8, 8, 8, 8, 8,
1266                                 -1, 0, 8, 8, 8, 8, 8, 8,
1267                         };
1268
1269                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1270                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1271
1272                         /* Do the lookup */
1273                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1274                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1275
1276                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1277                          * and place it in ol3/ol4type of senddesc_w1
1278                          */
1279                         const uint8x16_t shuf_mask0 = {
1280                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1281                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1282                         };
1283
1284                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1285                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1286
1287                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1288                          * a [E(32):E(16):OL3(8):OL2(8)]
1289                          * a = a + (a << 8)
1290                          * a [E(32):E(16):(OL3+OL2):OL2]
1291                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1292                          */
1293                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1294                                                  vshlq_n_u16(senddesc01_w1, 8));
1295                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1296                                                  vshlq_n_u16(senddesc23_w1, 8));
1297
1298                         /* Move ltypes to senddesc*_w1 */
1299                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1300                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1301                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1302                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1303                         /* Lookup table to translate ol_flags to
1304                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1305                          */
1306                         const uint8x16x2_t tbl = {{
1307                                 {
1308                                         /* [0-15] = il4type:il3type */
1309                                         0x04, /* none (IPv6) */
1310                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1311                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1312                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1313                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1314                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1315                                                * RTE_MBUF_F_TX_TCP_CKSUM
1316                                                */
1317                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
1318                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1319                                                */
1320                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
1321                                                * RTE_MBUF_F_TX_UDP_CKSUM
1322                                                */
1323                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
1324                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
1325                                                * RTE_MBUF_F_TX_TCP_CKSUM
1326                                                */
1327                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
1328                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1329                                                */
1330                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
1331                                                * RTE_MBUF_F_TX_UDP_CKSUM
1332                                                */
1333                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
1334                                                * RTE_MBUF_F_TX_IP_CKSUM
1335                                                */
1336                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1337                                                * RTE_MBUF_F_TX_TCP_CKSUM
1338                                                */
1339                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1340                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1341                                                */
1342                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1343                                                * RTE_MBUF_F_TX_UDP_CKSUM
1344                                                */
1345                                 },
1346
1347                                 {
1348                                         /* [16-31] = ol4type:ol3type */
1349                                         0x00, /* none */
1350                                         0x03, /* OUTER_IP_CKSUM */
1351                                         0x02, /* OUTER_IPV4 */
1352                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1353                                         0x04, /* OUTER_IPV6 */
1354                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1355                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1356                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1357                                                * OUTER_IP_CKSUM
1358                                                */
1359                                         0x00, /* OUTER_UDP_CKSUM */
1360                                         0x33, /* OUTER_UDP_CKSUM |
1361                                                * OUTER_IP_CKSUM
1362                                                */
1363                                         0x32, /* OUTER_UDP_CKSUM |
1364                                                * OUTER_IPV4
1365                                                */
1366                                         0x33, /* OUTER_UDP_CKSUM |
1367                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1368                                                */
1369                                         0x34, /* OUTER_UDP_CKSUM |
1370                                                * OUTER_IPV6
1371                                                */
1372                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1373                                                * OUTER_IP_CKSUM
1374                                                */
1375                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1376                                                * OUTER_IPV4
1377                                                */
1378                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1379                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1380                                                */
1381                                 },
1382                         }};
1383
1384                         /* Extract olflags to translate to oltype & iltype */
1385                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1386                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1387
1388                         /*
1389                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1390                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1391                          */
1392                         const uint32x4_t tshft_4 = {
1393                                 1,
1394                                 0,
1395                                 1,
1396                                 0,
1397                         };
1398                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1399                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1400
1401                         /*
1402                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1403                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1404                          */
1405                         const uint8x16_t shuf_mask5 = {
1406                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1407                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1408                         };
1409                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1410                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1411
1412                         /* Extract outer and inner header ol_flags */
1413                         const uint64x2_t oi_cksum_mask = {
1414                                 0x1CF0020000000000,
1415                                 0x1CF0020000000000,
1416                         };
1417
1418                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1419                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1420
1421                         /* Extract OUTER_UDP_CKSUM bit 41 and
1422                          * move it to bit 61
1423                          */
1424
1425                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1426                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1427
1428                         /* Shift right oltype by 2 and iltype by 4
1429                          * to start oltype nibble from BIT(58)
1430                          * instead of BIT(56) and iltype nibble from BIT(48)
1431                          * instead of BIT(52).
1432                          */
1433                         const int8x16_t tshft5 = {
1434                                 8, 8, 8, 8, 8, 8, -4, -2,
1435                                 8, 8, 8, 8, 8, 8, -4, -2,
1436                         };
1437
1438                         xtmp128 = vshlq_u8(xtmp128, tshft5);
1439                         ytmp128 = vshlq_u8(ytmp128, tshft5);
1440                         /*
1441                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1442                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1443                          */
1444                         const int8x16_t tshft3 = {
1445                                 -1, 0, -1, 0, 0, 0, 0, 0,
1446                                 -1, 0, -1, 0, 0, 0, 0, 0,
1447                         };
1448
1449                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1450                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1451
1452                         /* Mark Bit(4) of oltype */
1453                         const uint64x2_t oi_cksum_mask2 = {
1454                                 0x1000000000000000,
1455                                 0x1000000000000000,
1456                         };
1457
1458                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1459                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1460
1461                         /* Do the lookup */
1462                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1463                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1464
1465                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
1466                          * Bit 56:63 of oltype and place it in corresponding
1467                          * place in senddesc_w1.
1468                          */
1469                         const uint8x16_t shuf_mask0 = {
1470                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1471                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1472                         };
1473
1474                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1475                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1476
1477                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1478                          * l3len, l2len, ol3len, ol2len.
1479                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1480                          * a = a + (a << 8)
1481                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1482                          * a = a + (a << 16)
1483                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1484                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1485                          */
1486                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1487                                                  vshlq_n_u32(senddesc01_w1, 8));
1488                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1489                                                  vshlq_n_u32(senddesc23_w1, 8));
1490
1491                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1492                         senddesc01_w1 = vaddq_u8(
1493                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1494                         senddesc23_w1 = vaddq_u8(
1495                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1496
1497                         /* Move ltypes to senddesc*_w1 */
1498                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1499                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1500                 }
1501
1502                 xmask01 = vdupq_n_u64(0);
1503                 xmask23 = xmask01;
1504                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1505                              : [a] "+w"(xmask01)
1506                              : [in] "r"(mbuf0)
1507                              : "memory");
1508
1509                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1510                              : [a] "+w"(xmask01)
1511                              : [in] "r"(mbuf1)
1512                              : "memory");
1513
1514                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1515                              : [b] "+w"(xmask23)
1516                              : [in] "r"(mbuf2)
1517                              : "memory");
1518
1519                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1520                              : [b] "+w"(xmask23)
1521                              : [in] "r"(mbuf3)
1522                              : "memory");
1523                 xmask01 = vshlq_n_u64(xmask01, 20);
1524                 xmask23 = vshlq_n_u64(xmask23, 20);
1525
1526                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1527                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1528
1529                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1530                         /* Tx ol_flag for vlan. */
1531                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
1532                         /* Bit enable for VLAN1 */
1533                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1534                         /* Tx ol_flag for QnQ. */
1535                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
1536                         /* Bit enable for VLAN0 */
1537                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1538                         /* Load vlan values from packet. outer is VLAN 0 */
1539                         uint64x2_t ext01 = {
1540                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1541                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1542                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1543                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1544                         };
1545                         uint64x2_t ext23 = {
1546                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1547                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1548                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1549                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1550                         };
1551
1552                         /* Get ol_flags of the packets. */
1553                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1554                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1555
1556                         /* ORR vlan outer/inner values into cmd. */
1557                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1558                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1559
1560                         /* Test for offload enable bits and generate masks. */
1561                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1562                                                       mlv),
1563                                             vandq_u64(vtstq_u64(xtmp128, olq),
1564                                                       mlq));
1565                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1566                                                       mlv),
1567                                             vandq_u64(vtstq_u64(ytmp128, olq),
1568                                                       mlq));
1569
1570                         /* Set vlan enable bits into cmd based on mask. */
1571                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1572                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1573                 }
1574
1575                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1576                         /* Tx ol_flag for timestamp. */
1577                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
1578                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
1579                         /* Set send mem alg to SUB. */
1580                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1581                         /* Increment send mem address by 8. */
1582                         const uint64x2_t addr = {0x8, 0x8};
1583
1584                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1585                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1586
1587                         /* Check if timestamp is requested and generate inverted
1588                          * mask as we need not make any changes to default cmd
1589                          * value.
1590                          */
1591                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1592                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1593
1594                         /* Change send mem address to an 8 byte offset when
1595                          * TSTMP is disabled.
1596                          */
1597                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
1598                                                  vandq_u64(xtmp128, addr));
1599                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
1600                                                  vandq_u64(ytmp128, addr));
1601                         /* Change send mem alg to SUB when TSTMP is disabled. */
1602                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
1603                                                  vandq_u64(xtmp128, alg));
1604                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
1605                                                  vandq_u64(ytmp128, alg));
1606
1607                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1608                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1609                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1610                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1611                 }
1612
1613                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1614                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1615                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1616
1617                         /* Extract SD W1 as we need to set L4 types. */
1618                         vst1q_u64(sd_w1, senddesc01_w1);
1619                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
1620
1621                         /* Extract SX W0 as we need to set LSO fields. */
1622                         vst1q_u64(sx_w0, sendext01_w0);
1623                         vst1q_u64(sx_w0 + 2, sendext23_w0);
1624
1625                         /* Extract ol_flags. */
1626                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1627                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1628
1629                         /* Prepare individual mbufs. */
1630                         cn9k_nix_prepare_tso(tx_pkts[0],
1631                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
1632                                 (union nix_send_ext_w0_u *)&sx_w0[0],
1633                                 vgetq_lane_u64(xtmp128, 0), flags);
1634
1635                         cn9k_nix_prepare_tso(tx_pkts[1],
1636                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
1637                                 (union nix_send_ext_w0_u *)&sx_w0[1],
1638                                 vgetq_lane_u64(xtmp128, 1), flags);
1639
1640                         cn9k_nix_prepare_tso(tx_pkts[2],
1641                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
1642                                 (union nix_send_ext_w0_u *)&sx_w0[2],
1643                                 vgetq_lane_u64(ytmp128, 0), flags);
1644
1645                         cn9k_nix_prepare_tso(tx_pkts[3],
1646                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
1647                                 (union nix_send_ext_w0_u *)&sx_w0[3],
1648                                 vgetq_lane_u64(ytmp128, 1), flags);
1649
1650                         senddesc01_w1 = vld1q_u64(sd_w1);
1651                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1652
1653                         sendext01_w0 = vld1q_u64(sx_w0);
1654                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
1655                 }
1656
1657                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1658                     !(flags & NIX_TX_MULTI_SEG_F)) {
1659                         /* Set don't free bit if reference count > 1 */
1660                         xmask01 = vdupq_n_u64(0);
1661                         xmask23 = xmask01;
1662
1663                         /* Move mbufs to iova */
1664                         mbuf0 = (uint64_t *)tx_pkts[0];
1665                         mbuf1 = (uint64_t *)tx_pkts[1];
1666                         mbuf2 = (uint64_t *)tx_pkts[2];
1667                         mbuf3 = (uint64_t *)tx_pkts[3];
1668
1669                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1670                                 vsetq_lane_u64(0x80000, xmask01, 0);
1671                         else
1672                                 RTE_MEMPOOL_CHECK_COOKIES(
1673                                         ((struct rte_mbuf *)mbuf0)->pool,
1674                                         (void **)&mbuf0, 1, 0);
1675
1676                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1677                                 vsetq_lane_u64(0x80000, xmask01, 1);
1678                         else
1679                                 RTE_MEMPOOL_CHECK_COOKIES(
1680                                         ((struct rte_mbuf *)mbuf1)->pool,
1681                                         (void **)&mbuf1, 1, 0);
1682
1683                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1684                                 vsetq_lane_u64(0x80000, xmask23, 0);
1685                         else
1686                                 RTE_MEMPOOL_CHECK_COOKIES(
1687                                         ((struct rte_mbuf *)mbuf2)->pool,
1688                                         (void **)&mbuf2, 1, 0);
1689
1690                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1691                                 vsetq_lane_u64(0x80000, xmask23, 1);
1692                         else
1693                                 RTE_MEMPOOL_CHECK_COOKIES(
1694                                         ((struct rte_mbuf *)mbuf3)->pool,
1695                                         (void **)&mbuf3, 1, 0);
1696                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1697                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1698                         /* Ensuring mbuf fields which got updated in
1699                          * cnxk_nix_prefree_seg are written before LMTST.
1700                          */
1701                         rte_io_wmb();
1702                 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1703                         /* Move mbufs to iova */
1704                         mbuf0 = (uint64_t *)tx_pkts[0];
1705                         mbuf1 = (uint64_t *)tx_pkts[1];
1706                         mbuf2 = (uint64_t *)tx_pkts[2];
1707                         mbuf3 = (uint64_t *)tx_pkts[3];
1708
1709                         /* Mark mempool object as "put" since
1710                          * it is freed by NIX
1711                          */
1712                         RTE_MEMPOOL_CHECK_COOKIES(
1713                                 ((struct rte_mbuf *)mbuf0)->pool,
1714                                 (void **)&mbuf0, 1, 0);
1715
1716                         RTE_MEMPOOL_CHECK_COOKIES(
1717                                 ((struct rte_mbuf *)mbuf1)->pool,
1718                                 (void **)&mbuf1, 1, 0);
1719
1720                         RTE_MEMPOOL_CHECK_COOKIES(
1721                                 ((struct rte_mbuf *)mbuf2)->pool,
1722                                 (void **)&mbuf2, 1, 0);
1723
1724                         RTE_MEMPOOL_CHECK_COOKIES(
1725                                 ((struct rte_mbuf *)mbuf3)->pool,
1726                                 (void **)&mbuf3, 1, 0);
1727 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1728                         rte_io_wmb();
1729 #endif
1730                 }
1731
1732                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1733                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1734                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1735                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1736                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1737
1738                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1739                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1740                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1741                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1742
1743                 if (flags & NIX_TX_NEED_EXT_HDR) {
1744                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1745                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1746                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1747                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1748                 }
1749
1750                 if (flags & NIX_TX_MULTI_SEG_F) {
1751                         uint64_t seg_list[NIX_DESCS_PER_LOOP]
1752                                          [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
1753                         uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
1754
1755                         /* Build mseg list for each packet individually. */
1756                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1757                                 segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
1758                                                         seg_list[j], &cmd0[j],
1759                                                         &cmd1[j], flags);
1760                         segdw[4] = 8;
1761
1762                         /* Commit all changes to mbuf before LMTST. */
1763                         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1764                                 rte_io_wmb();
1765
1766                         cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
1767                                                        segdw, seg_list,
1768                                                        lmt_addr, io_addr,
1769                                                        flags);
1770                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1771                         /* With ext header in the command we can no longer send
1772                          * all 4 packets together since LMTLINE is 128bytes.
1773                          * Split and Tx twice.
1774                          */
1775                         do {
1776                                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1777                                         vst1q_u64(lmt_addr, cmd0[0]);
1778                                         vst1q_u64(lmt_addr + 2, cmd2[0]);
1779                                         vst1q_u64(lmt_addr + 4, cmd1[0]);
1780                                         vst1q_u64(lmt_addr + 6, cmd3[0]);
1781                                         vst1q_u64(lmt_addr + 8, cmd0[1]);
1782                                         vst1q_u64(lmt_addr + 10, cmd2[1]);
1783                                         vst1q_u64(lmt_addr + 12, cmd1[1]);
1784                                         vst1q_u64(lmt_addr + 14, cmd3[1]);
1785                                 } else {
1786                                         vst1q_u64(lmt_addr, cmd0[0]);
1787                                         vst1q_u64(lmt_addr + 2, cmd2[0]);
1788                                         vst1q_u64(lmt_addr + 4, cmd1[0]);
1789                                         vst1q_u64(lmt_addr + 6, cmd0[1]);
1790                                         vst1q_u64(lmt_addr + 8, cmd2[1]);
1791                                         vst1q_u64(lmt_addr + 10, cmd1[1]);
1792                                 }
1793                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1794                         } while (lmt_status == 0);
1795
1796                         do {
1797                                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1798                                         vst1q_u64(lmt_addr, cmd0[2]);
1799                                         vst1q_u64(lmt_addr + 2, cmd2[2]);
1800                                         vst1q_u64(lmt_addr + 4, cmd1[2]);
1801                                         vst1q_u64(lmt_addr + 6, cmd3[2]);
1802                                         vst1q_u64(lmt_addr + 8, cmd0[3]);
1803                                         vst1q_u64(lmt_addr + 10, cmd2[3]);
1804                                         vst1q_u64(lmt_addr + 12, cmd1[3]);
1805                                         vst1q_u64(lmt_addr + 14, cmd3[3]);
1806                                 } else {
1807                                         vst1q_u64(lmt_addr, cmd0[2]);
1808                                         vst1q_u64(lmt_addr + 2, cmd2[2]);
1809                                         vst1q_u64(lmt_addr + 4, cmd1[2]);
1810                                         vst1q_u64(lmt_addr + 6, cmd0[3]);
1811                                         vst1q_u64(lmt_addr + 8, cmd2[3]);
1812                                         vst1q_u64(lmt_addr + 10, cmd1[3]);
1813                                 }
1814                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1815                         } while (lmt_status == 0);
1816                 } else {
1817                         do {
1818                                 vst1q_u64(lmt_addr, cmd0[0]);
1819                                 vst1q_u64(lmt_addr + 2, cmd1[0]);
1820                                 vst1q_u64(lmt_addr + 4, cmd0[1]);
1821                                 vst1q_u64(lmt_addr + 6, cmd1[1]);
1822                                 vst1q_u64(lmt_addr + 8, cmd0[2]);
1823                                 vst1q_u64(lmt_addr + 10, cmd1[2]);
1824                                 vst1q_u64(lmt_addr + 12, cmd0[3]);
1825                                 vst1q_u64(lmt_addr + 14, cmd1[3]);
1826                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1827                         } while (lmt_status == 0);
1828                 }
1829                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1830         }
1831
1832         if (unlikely(pkts_left)) {
1833                 if (flags & NIX_TX_MULTI_SEG_F)
1834                         pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
1835                                                         pkts_left, cmd, flags);
1836                 else
1837                         pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
1838                                                    cmd, flags);
1839         }
1840
1841         return pkts;
1842 }
1843
1844 #else
1845 static __rte_always_inline uint16_t
1846 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1847                           uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1848 {
1849         RTE_SET_USED(tx_queue);
1850         RTE_SET_USED(tx_pkts);
1851         RTE_SET_USED(pkts);
1852         RTE_SET_USED(cmd);
1853         RTE_SET_USED(flags);
1854         return 0;
1855 }
1856 #endif
1857
1858 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
1859 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1860 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
1861 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
1862 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
1863 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
1864 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
1865
1866 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1867 #define NIX_TX_FASTPATH_MODES_0_15                                             \
1868         T(no_offload, 4, NIX_TX_OFFLOAD_NONE)                                  \
1869         T(l3l4csum, 4, L3L4CSUM_F)                                             \
1870         T(ol3ol4csum, 4, OL3OL4CSUM_F)                                         \
1871         T(ol3ol4csum_l3l4csum, 4, OL3OL4CSUM_F | L3L4CSUM_F)                   \
1872         T(vlan, 6, VLAN_F)                                                     \
1873         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
1874         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
1875         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1876         T(noff, 4, NOFF_F)                                                     \
1877         T(noff_l3l4csum, 4, NOFF_F | L3L4CSUM_F)                               \
1878         T(noff_ol3ol4csum, 4, NOFF_F | OL3OL4CSUM_F)                           \
1879         T(noff_ol3ol4csum_l3l4csum, 4, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1880         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
1881         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
1882         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
1883         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
1884           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1885
1886 #define NIX_TX_FASTPATH_MODES_16_31                                            \
1887         T(tso, 6, TSO_F)                                                       \
1888         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
1889         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
1890         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
1891         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
1892         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
1893         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
1894         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
1895           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1896         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
1897         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
1898         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
1899         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
1900           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1901         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
1902         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
1903         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1904         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
1905           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1906
1907 #define NIX_TX_FASTPATH_MODES_32_47                                            \
1908         T(ts, 8, TSP_F)                                                        \
1909         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
1910         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
1911         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
1912         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
1913         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
1914         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
1915         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
1916           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1917         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
1918         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
1919         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
1920         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
1921           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1922         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
1923         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
1924         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
1925         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
1926           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1927
1928 #define NIX_TX_FASTPATH_MODES_48_63                                            \
1929         T(ts_tso, 8, TSP_F | TSO_F)                                            \
1930         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
1931         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
1932         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
1933           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
1934         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
1935         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
1936         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
1937         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
1938           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
1939         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
1940         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
1941         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
1942         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
1943           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
1944         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
1945         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
1946           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
1947         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
1948           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
1949         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
1950           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1951
1952 #define NIX_TX_FASTPATH_MODES_64_79                                            \
1953         T(sec, 4, T_SEC_F)                                                     \
1954         T(sec_l3l4csum, 4, T_SEC_F | L3L4CSUM_F)                               \
1955         T(sec_ol3ol4csum, 4, T_SEC_F | OL3OL4CSUM_F)                           \
1956         T(sec_ol3ol4csum_l3l4csum, 4, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1957         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
1958         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
1959         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
1960         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
1961           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
1962         T(sec_noff, 4, T_SEC_F | NOFF_F)                                       \
1963         T(sec_noff_l3l4csum, 4, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
1964         T(sec_noff_ol3ol4csum, 4, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
1965         T(sec_noff_ol3ol4csum_l3l4csum, 4,                                     \
1966           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
1967         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
1968         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
1969         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
1970           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
1971         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
1972           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1973
1974 #define NIX_TX_FASTPATH_MODES_80_95                                            \
1975         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
1976         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
1977         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
1978         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
1979           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
1980         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
1981         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
1982         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1983         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
1984           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
1985         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
1986         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
1987         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1988         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
1989           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
1990         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
1991         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
1992           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
1993         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
1994           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
1995         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
1996           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1997
1998 #define NIX_TX_FASTPATH_MODES_96_111                                           \
1999         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2000         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2001         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2002         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2003           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2004         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2005         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2006         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2007         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2008           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2009         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2010         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2011         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2012         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2013           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2014         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2015         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2016           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2017         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2018           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2019         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2020           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2021
2022 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2023         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2024         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2025         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2026         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2027           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2028         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2029         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2030           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2031         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2032           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2033         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2034           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2035         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2036         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2037           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2038         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2039           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2040         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2041           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2042         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2043         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2044           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2045         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2046           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2047         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2048           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2049                   L3L4CSUM_F)
2050
2051 #define NIX_TX_FASTPATH_MODES                                                  \
2052         NIX_TX_FASTPATH_MODES_0_15                                             \
2053         NIX_TX_FASTPATH_MODES_16_31                                            \
2054         NIX_TX_FASTPATH_MODES_32_47                                            \
2055         NIX_TX_FASTPATH_MODES_48_63                                            \
2056         NIX_TX_FASTPATH_MODES_64_79                                            \
2057         NIX_TX_FASTPATH_MODES_80_95                                            \
2058         NIX_TX_FASTPATH_MODES_96_111                                           \
2059         NIX_TX_FASTPATH_MODES_112_127
2060
2061 #define T(name, sz, flags)                                                     \
2062         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name(           \
2063                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2064         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name(      \
2065                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2066         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name(       \
2067                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2068         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name(  \
2069                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2070
2071 NIX_TX_FASTPATH_MODES
2072 #undef T
2073
2074 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2075         uint16_t __rte_noinline __rte_hot fn(                                  \
2076                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2077         {                                                                      \
2078                 uint64_t cmd[sz];                                              \
2079                 /* For TSO inner checksum is a must */                         \
2080                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2081                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2082                         return 0;                                              \
2083                 return cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd,        \
2084                                           flags);                              \
2085         }
2086
2087 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2088         uint16_t __rte_noinline __rte_hot fn(                                  \
2089                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2090         {                                                                      \
2091                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2092                 /* For TSO inner checksum is a must */                         \
2093                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2094                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2095                         return 0;                                              \
2096                 return cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,   \
2097                                                (flags) | NIX_TX_MULTI_SEG_F);  \
2098         }
2099
2100 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
2101         uint16_t __rte_noinline __rte_hot fn(                                  \
2102                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2103         {                                                                      \
2104                 uint64_t cmd[sz];                                              \
2105                 /* For TSO inner checksum is a must */                         \
2106                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2107                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2108                         return 0;                                              \
2109                 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2110                                                  (flags));                     \
2111         }
2112
2113 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
2114         uint16_t __rte_noinline __rte_hot fn(                                  \
2115                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2116         {                                                                      \
2117                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2118                 /* For TSO inner checksum is a must */                         \
2119                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2120                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2121                         return 0;                                              \
2122                 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2123                                                  (flags) |                     \
2124                                                          NIX_TX_MULTI_SEG_F);  \
2125         }
2126
2127 #endif /* __CN9K_TX_H__ */