common/cnxk: support per-port RQ in inline device
[dpdk.git] / drivers / net / cnxk / cn9k_tx.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN9K_TX_H__
5 #define __CN9K_TX_H__
6
7 #include <rte_vect.h>
8
9 #define NIX_TX_OFFLOAD_NONE           (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F          BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
16 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
17 #define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
18
19 /* Flags to control xmit_prepare function.
20  * Defining it from backwards to denote its been
21  * not used as offload flags to pick function
22  */
23 #define NIX_TX_MULTI_SEG_F BIT(15)
24
25 #define NIX_TX_NEED_SEND_HDR_W1                                                \
26         (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
27          NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
28
29 #define NIX_TX_NEED_EXT_HDR                                                    \
30         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
31          NIX_TX_OFFLOAD_TSO_F)
32
33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
34         do {                                                                   \
35                 /* Cached value is low, Update the fc_cache_pkts */            \
36                 if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
37                         /* Multiply with sqe_per_sqb to express in pkts */     \
38                         (txq)->fc_cache_pkts =                                 \
39                                 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
40                                 << (txq)->sqes_per_sqb_log2;                   \
41                         /* Check it again for the room */                      \
42                         if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
43                                 return 0;                                      \
44                 }                                                              \
45         } while (0)
46
47 /* Function to determine no of tx subdesc required in case ext
48  * sub desc is enabled.
49  */
50 static __rte_always_inline int
51 cn9k_nix_tx_ext_subs(const uint16_t flags)
52 {
53         return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
54                        ? 2
55                        : ((flags &
56                            (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
57                                   ? 1
58                                   : 0);
59 }
60
61 static __rte_always_inline void
62 cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,
63                      const uint16_t flags, const uint16_t static_sz)
64 {
65         if (static_sz)
66                 cmd[0] = txq->send_hdr_w0;
67         else
68                 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
69                          ((uint64_t)(cn9k_nix_tx_ext_subs(flags) + 1) << 40);
70         cmd[1] = 0;
71
72         if (flags & NIX_TX_NEED_EXT_HDR) {
73                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
74                         cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
75                 else
76                         cmd[2] = NIX_SUBDC_EXT << 60;
77                 cmd[3] = 0;
78                 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
79         } else {
80                 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
81         }
82 }
83
84 static __rte_always_inline void
85 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
86 {
87         uint64_t mask, ol_flags = m->ol_flags;
88
89         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
90                 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
91                 uint16_t *iplen, *oiplen, *oudplen;
92                 uint16_t lso_sb, paylen;
93
94                 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
95                 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
96                          m->l2_len + m->l3_len + m->l4_len;
97
98                 /* Reduce payload len from base headers */
99                 paylen = m->pkt_len - lso_sb;
100
101                 /* Get iplen position assuming no tunnel hdr */
102                 iplen = (uint16_t *)(mdata + m->l2_len +
103                                      (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
104                 /* Handle tunnel tso */
105                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
106                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
107                         const uint8_t is_udp_tun =
108                                 (CNXK_NIX_UDP_TUN_BITMASK >>
109                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
110                                 0x1;
111
112                         oiplen = (uint16_t *)(mdata + m->outer_l2_len +
113                                               (2 << !!(ol_flags &
114                                                        RTE_MBUF_F_TX_OUTER_IPV6)));
115                         *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
116                                                    paylen);
117
118                         /* Update format for UDP tunneled packet */
119                         if (is_udp_tun) {
120                                 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
121                                                        m->outer_l3_len + 4);
122                                 *oudplen = rte_cpu_to_be_16(
123                                         rte_be_to_cpu_16(*oudplen) - paylen);
124                         }
125
126                         /* Update iplen position to inner ip hdr */
127                         iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
128                                              m->l4_len +
129                                              (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
130                 }
131
132                 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
133         }
134 }
135
136 static __rte_always_inline void
137 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
138                       const uint64_t lso_tun_fmt, uint8_t mark_flag,
139                       uint64_t mark_fmt)
140 {
141         uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
142         struct nix_send_ext_s *send_hdr_ext;
143         struct nix_send_hdr_s *send_hdr;
144         uint64_t ol_flags = 0, mask;
145         union nix_send_hdr_w1_u w1;
146         union nix_send_sg_s *sg;
147         uint16_t mark_form = 0;
148
149         send_hdr = (struct nix_send_hdr_s *)cmd;
150         if (flags & NIX_TX_NEED_EXT_HDR) {
151                 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
152                 sg = (union nix_send_sg_s *)(cmd + 4);
153                 /* Clear previous markings */
154                 send_hdr_ext->w0.lso = 0;
155                 send_hdr_ext->w0.mark_en = 0;
156                 send_hdr_ext->w1.u = 0;
157                 ol_flags = m->ol_flags;
158         } else {
159                 sg = (union nix_send_sg_s *)(cmd + 2);
160         }
161
162         if (flags & NIX_TX_NEED_SEND_HDR_W1) {
163                 ol_flags = m->ol_flags;
164                 w1.u = 0;
165         }
166
167         if (!(flags & NIX_TX_MULTI_SEG_F))
168                 send_hdr->w0.total = m->data_len;
169         else
170                 send_hdr->w0.total = m->pkt_len;
171         send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
172
173         /*
174          * L3type:  2 => IPV4
175          *          3 => IPV4 with csum
176          *          4 => IPV6
177          * L3type and L3ptr needs to be set for either
178          * L3 csum or L4 csum or LSO
179          *
180          */
181
182         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
183             (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
184                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
185                 const uint8_t ol3type =
186                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
187                         ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
188                         !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
189
190                 /* Outer L3 */
191                 w1.ol3type = ol3type;
192                 mask = 0xffffull << ((!!ol3type) << 4);
193                 w1.ol3ptr = ~mask & m->outer_l2_len;
194                 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
195
196                 /* Outer L4 */
197                 w1.ol4type = csum + (csum << 1);
198
199                 /* Inner L3 */
200                 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
201                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
202                 w1.il3ptr = w1.ol4ptr + m->l2_len;
203                 w1.il4ptr = w1.il3ptr + m->l3_len;
204                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
205                 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
206
207                 /* Inner L4 */
208                 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
209
210                 /* In case of no tunnel header use only
211                  * shift IL3/IL4 fields a bit to use
212                  * OL3/OL4 for header checksum
213                  */
214                 mask = !ol3type;
215                 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
216                        ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
217
218         } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
219                 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
220                 const uint8_t outer_l2_len = m->outer_l2_len;
221
222                 /* Outer L3 */
223                 w1.ol3ptr = outer_l2_len;
224                 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
225                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
226                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
227                              ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
228                              !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
229
230                 /* Outer L4 */
231                 w1.ol4type = csum + (csum << 1);
232
233         } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
234                 const uint8_t l2_len = m->l2_len;
235
236                 /* Always use OLXPTR and OLXTYPE when only
237                  * when one header is present
238                  */
239
240                 /* Inner L3 */
241                 w1.ol3ptr = l2_len;
242                 w1.ol4ptr = l2_len + m->l3_len;
243                 /* Increment it by 1 if it is IPV4 as 3 is with csum */
244                 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
245                              ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
246                              !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
247
248                 /* Inner L4 */
249                 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
250         }
251
252         if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
253                 const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
254                 const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
255                                                   RTE_MBUF_F_TX_IPV6));
256
257                 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
258                 /* HW will update ptr after vlan0 update */
259                 send_hdr_ext->w1.vlan1_ins_ptr = 12;
260                 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
261
262                 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
263                 /* 2B before end of l2 header */
264                 send_hdr_ext->w1.vlan0_ins_ptr = 12;
265                 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
266                 /* Fill for VLAN marking only when VLAN insertion enabled */
267                 mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
268                              (send_hdr_ext->w1.vlan1_ins_ena ||
269                               send_hdr_ext->w1.vlan0_ins_ena));
270                 /* Mask requested flags with packet data information */
271                 mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
272                 mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
273
274                 mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
275                 mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
276                 markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
277
278                 send_hdr_ext->w0.mark_en = !!mark_off;
279                 send_hdr_ext->w0.markform = mark_form & 0x7F;
280                 send_hdr_ext->w0.markptr = markptr;
281         }
282
283         if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
284                 uint16_t lso_sb;
285                 uint64_t mask;
286
287                 mask = -(!w1.il3type);
288                 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
289
290                 send_hdr_ext->w0.lso_sb = lso_sb;
291                 send_hdr_ext->w0.lso = 1;
292                 send_hdr_ext->w0.lso_mps = m->tso_segsz;
293                 send_hdr_ext->w0.lso_format =
294                         NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
295                 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
296
297                 /* Handle tunnel tso */
298                 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
299                     (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
300                         const uint8_t is_udp_tun =
301                                 (CNXK_NIX_UDP_TUN_BITMASK >>
302                                  ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
303                                 0x1;
304                         uint8_t shift = is_udp_tun ? 32 : 0;
305
306                         shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
307                         shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
308
309                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
310                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
311                         /* Update format for UDP tunneled packet */
312                         send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
313                 }
314         }
315
316         if (flags & NIX_TX_NEED_SEND_HDR_W1)
317                 send_hdr->w1.u = w1.u;
318
319         if (!(flags & NIX_TX_MULTI_SEG_F)) {
320                 sg->seg1_size = m->data_len;
321                 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
322
323                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
324                         /* DF bit = 1 if refcount of current mbuf or parent mbuf
325                          *              is greater than 1
326                          * DF bit = 0 otherwise
327                          */
328                         send_hdr->w0.df = cnxk_nix_prefree_seg(m);
329                         /* Ensuring mbuf fields which got updated in
330                          * cnxk_nix_prefree_seg are written before LMTST.
331                          */
332                         rte_io_wmb();
333                 }
334                 /* Mark mempool object as "put" since it is freed by NIX */
335                 if (!send_hdr->w0.df)
336                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
337         } else {
338                 sg->seg1_size = m->data_len;
339                 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
340
341                 /* NOFF is handled later for multi-seg */
342         }
343 }
344
345 static __rte_always_inline void
346 cn9k_nix_xmit_prepare_tstamp(struct cn9k_eth_txq *txq, uint64_t *cmd,
347                              const uint64_t ol_flags, const uint16_t no_segdw,
348                              const uint16_t flags)
349 {
350         if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
351                 struct nix_send_mem_s *send_mem;
352                 uint16_t off = (no_segdw - 1) << 1;
353                 const uint8_t is_ol_tstamp =
354                         !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
355
356                 send_mem = (struct nix_send_mem_s *)(cmd + off);
357
358                 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
359                  * should not be recorded, hence changing the alg type to
360                  * NIX_SENDMEMALG_SUB and also changing send mem addr field to
361                  * next 8 bytes as it corrupts the actual Tx tstamp registered
362                  * address.
363                  */
364                 send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM;
365                 send_mem->w0.cn9k.alg =
366                         NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
367
368                 send_mem->addr = (rte_iova_t)(((uint64_t *)txq->ts_mem) +
369                                 (is_ol_tstamp));
370         }
371 }
372
373 static __rte_always_inline void
374 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
375                   const uint32_t flags)
376 {
377         uint64_t lmt_status;
378
379         do {
380                 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
381                 lmt_status = roc_lmt_submit_ldeor(io_addr);
382         } while (lmt_status == 0);
383 }
384
385 static __rte_always_inline void
386 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
387 {
388         roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
389 }
390
391 static __rte_always_inline uint64_t
392 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
393 {
394         return roc_lmt_submit_ldeor(io_addr);
395 }
396
397 static __rte_always_inline uint64_t
398 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
399 {
400         return roc_lmt_submit_ldeorl(io_addr);
401 }
402
403 static __rte_always_inline uint16_t
404 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
405 {
406         struct nix_send_hdr_s *send_hdr;
407         union nix_send_sg_s *sg;
408         struct rte_mbuf *m_next;
409         uint64_t *slist, sg_u;
410         uint64_t nb_segs;
411         uint64_t segdw;
412         uint8_t off, i;
413
414         send_hdr = (struct nix_send_hdr_s *)cmd;
415
416         if (flags & NIX_TX_NEED_EXT_HDR)
417                 off = 2;
418         else
419                 off = 0;
420
421         sg = (union nix_send_sg_s *)&cmd[2 + off];
422
423         /* Start from second segment, first segment is already there */
424         i = 1;
425         sg_u = sg->u;
426         nb_segs = m->nb_segs - 1;
427         m_next = m->next;
428         slist = &cmd[3 + off + 1];
429
430         /* Set invert df if buffer is not to be freed by H/W */
431         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
432                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
433                 rte_io_wmb();
434         }
435
436         /* Mark mempool object as "put" since it is freed by NIX */
437 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
438         if (!(sg_u & (1ULL << 55)))
439                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
440         rte_io_wmb();
441 #endif
442         m = m_next;
443         if (!m)
444                 goto done;
445
446         /* Fill mbuf segments */
447         do {
448                 m_next = m->next;
449                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
450                 *slist = rte_mbuf_data_iova(m);
451                 /* Set invert df if buffer is not to be freed by H/W */
452                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
453                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
454                         /* Commit changes to mbuf */
455                         rte_io_wmb();
456                 }
457                 /* Mark mempool object as "put" since it is freed by NIX */
458 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
459                 if (!(sg_u & (1ULL << (i + 55))))
460                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
461                 rte_io_wmb();
462 #endif
463                 slist++;
464                 i++;
465                 nb_segs--;
466                 if (i > 2 && nb_segs) {
467                         i = 0;
468                         /* Next SG subdesc */
469                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
470                         sg->u = sg_u;
471                         sg->segs = 3;
472                         sg = (union nix_send_sg_s *)slist;
473                         sg_u = sg->u;
474                         slist++;
475                 }
476                 m = m_next;
477         } while (nb_segs);
478
479 done:
480         sg->u = sg_u;
481         sg->segs = i;
482         segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
483         /* Roundup extra dwords to multiple of 2 */
484         segdw = (segdw >> 1) + (segdw & 0x1);
485         /* Default dwords */
486         segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
487         send_hdr->w0.sizem1 = segdw - 1;
488
489         return segdw;
490 }
491
492 static __rte_always_inline void
493 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
494 {
495         roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
496 }
497
498 static __rte_always_inline void
499 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
500                        uint16_t segdw)
501 {
502         uint64_t lmt_status;
503
504         do {
505                 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
506                 lmt_status = roc_lmt_submit_ldeor(io_addr);
507         } while (lmt_status == 0);
508 }
509
510 static __rte_always_inline void
511 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
512                                rte_iova_t io_addr, uint16_t segdw)
513 {
514         uint64_t lmt_status;
515
516         rte_io_wmb();
517         do {
518                 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
519                 lmt_status = roc_lmt_submit_ldeor(io_addr);
520         } while (lmt_status == 0);
521 }
522
523 static __rte_always_inline uint16_t
524 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
525                    uint64_t *cmd, const uint16_t flags)
526 {
527         struct cn9k_eth_txq *txq = tx_queue;
528         const rte_iova_t io_addr = txq->io_addr;
529         uint64_t lso_tun_fmt = 0, mark_fmt = 0;
530         void *lmt_addr = txq->lmt_addr;
531         uint8_t mark_flag = 0;
532         uint16_t i;
533
534         NIX_XMIT_FC_OR_RETURN(txq, pkts);
535
536         cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
537
538         /* Perform header writes before barrier for TSO */
539         if (flags & NIX_TX_OFFLOAD_TSO_F) {
540                 lso_tun_fmt = txq->lso_tun_fmt;
541
542                 for (i = 0; i < pkts; i++)
543                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
544         }
545
546         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
547                 mark_fmt = txq->mark_fmt;
548                 mark_flag = txq->mark_flag;
549         }
550
551         /* Lets commit any changes in the packet here as no further changes
552          * to the packet will be done unless no fast free is enabled.
553          */
554         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
555                 rte_io_wmb();
556
557         for (i = 0; i < pkts; i++) {
558                 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
559                                       mark_flag, mark_fmt);
560                 cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,
561                                              flags);
562                 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
563         }
564
565         /* Reduce the cached count */
566         txq->fc_cache_pkts -= pkts;
567
568         return pkts;
569 }
570
571 static __rte_always_inline uint16_t
572 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
573                         uint16_t pkts, uint64_t *cmd, const uint16_t flags)
574 {
575         struct cn9k_eth_txq *txq = tx_queue;
576         const rte_iova_t io_addr = txq->io_addr;
577         uint64_t lso_tun_fmt = 0, mark_fmt = 0;
578         void *lmt_addr = txq->lmt_addr;
579         uint8_t mark_flag = 0;
580         uint16_t segdw;
581         uint64_t i;
582
583         NIX_XMIT_FC_OR_RETURN(txq, pkts);
584
585         cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
586
587         /* Perform header writes before barrier for TSO */
588         if (flags & NIX_TX_OFFLOAD_TSO_F) {
589                 lso_tun_fmt = txq->lso_tun_fmt;
590
591                 for (i = 0; i < pkts; i++)
592                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
593         }
594
595         if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
596                 mark_fmt = txq->mark_fmt;
597                 mark_flag = txq->mark_flag;
598         }
599
600         /* Lets commit any changes in the packet here as no further changes
601          * to the packet will be done unless no fast free is enabled.
602          */
603         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
604                 rte_io_wmb();
605
606         for (i = 0; i < pkts; i++) {
607                 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
608                                       mark_flag, mark_fmt);
609                 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
610                 cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,
611                                              segdw, flags);
612                 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
613         }
614
615         /* Reduce the cached count */
616         txq->fc_cache_pkts -= pkts;
617
618         return pkts;
619 }
620
621 #if defined(RTE_ARCH_ARM64)
622
623 static __rte_always_inline void
624 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
625                      union nix_send_ext_w0_u *w0, uint64_t ol_flags,
626                      uint64_t flags)
627 {
628         uint16_t lso_sb;
629         uint64_t mask;
630
631         if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
632                 return;
633
634         mask = -(!w1->il3type);
635         lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
636
637         w0->u |= BIT(14);
638         w0->lso_sb = lso_sb;
639         w0->lso_mps = m->tso_segsz;
640         w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
641         w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
642
643         /* Handle tunnel tso */
644         if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
645             (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
646                 const uint8_t is_udp_tun =
647                         (CNXK_NIX_UDP_TUN_BITMASK >>
648                          ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
649                         0x1;
650
651                 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
652                 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
653                 /* Update format for UDP tunneled packet */
654                 w0->lso_format += is_udp_tun ? 2 : 6;
655
656                 w0->lso_format += !!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 1;
657         }
658 }
659
660 static __rte_always_inline uint8_t
661 cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
662                                union nix_send_hdr_w0_u *sh,
663                                union nix_send_sg_s *sg, const uint32_t flags)
664 {
665         struct rte_mbuf *m_next;
666         uint64_t *slist, sg_u;
667         uint16_t nb_segs;
668         uint64_t segdw;
669         int i = 1;
670
671         sh->total = m->pkt_len;
672         /* Clear sg->u header before use */
673         sg->u &= 0xFC00000000000000;
674         sg_u = sg->u;
675         slist = &cmd[0];
676
677         sg_u = sg_u | ((uint64_t)m->data_len);
678
679         nb_segs = m->nb_segs - 1;
680         m_next = m->next;
681
682         /* Set invert df if buffer is not to be freed by H/W */
683         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
684                 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
685                 /* Mark mempool object as "put" since it is freed by NIX */
686 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
687         if (!(sg_u & (1ULL << 55)))
688                 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
689         rte_io_wmb();
690 #endif
691
692         m = m_next;
693         /* Fill mbuf segments */
694         do {
695                 m_next = m->next;
696                 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
697                 *slist = rte_mbuf_data_iova(m);
698                 /* Set invert df if buffer is not to be freed by H/W */
699                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
700                         sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
701                         /* Mark mempool object as "put" since it is freed by NIX
702                          */
703 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
704                 if (!(sg_u & (1ULL << (i + 55))))
705                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
706                 rte_io_wmb();
707 #endif
708                 slist++;
709                 i++;
710                 nb_segs--;
711                 if (i > 2 && nb_segs) {
712                         i = 0;
713                         /* Next SG subdesc */
714                         *(uint64_t *)slist = sg_u & 0xFC00000000000000;
715                         sg->u = sg_u;
716                         sg->segs = 3;
717                         sg = (union nix_send_sg_s *)slist;
718                         sg_u = sg->u;
719                         slist++;
720                 }
721                 m = m_next;
722         } while (nb_segs);
723
724         sg->u = sg_u;
725         sg->segs = i;
726         segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
727
728         segdw += 2;
729         /* Roundup extra dwords to multiple of 2 */
730         segdw = (segdw >> 1) + (segdw & 0x1);
731         /* Default dwords */
732         segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
733                  !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
734         sh->sizem1 = segdw - 1;
735
736         return segdw;
737 }
738
739 static __rte_always_inline uint8_t
740 cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
741                           uint64x2_t *cmd1, const uint32_t flags)
742 {
743         union nix_send_hdr_w0_u sh;
744         union nix_send_sg_s sg;
745         uint8_t ret;
746
747         if (m->nb_segs == 1) {
748                 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
749                         sg.u = vgetq_lane_u64(cmd1[0], 0);
750                         sg.u |= (cnxk_nix_prefree_seg(m) << 55);
751                         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
752                 }
753
754 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
755                 sg.u = vgetq_lane_u64(cmd1[0], 0);
756                 if (!(sg.u & (1ULL << 55)))
757                         RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
758                 rte_io_wmb();
759 #endif
760                 return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
761                        !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
762         }
763
764         sh.u = vgetq_lane_u64(cmd0[0], 0);
765         sg.u = vgetq_lane_u64(cmd1[0], 0);
766
767         ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
768
769         cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
770         cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
771         return ret;
772 }
773
774 #define NIX_DESCS_PER_LOOP 4
775
776 static __rte_always_inline void
777 cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
778                                uint64x2_t *cmd2, uint64x2_t *cmd3,
779                                uint8_t *segdw,
780                                uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
781                                uint64_t *lmt_addr, rte_iova_t io_addr,
782                                const uint32_t flags)
783 {
784         uint64_t lmt_status;
785         uint8_t j, off;
786
787         if (!(flags & NIX_TX_NEED_EXT_HDR) &&
788             !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
789                 /* No segments in 4 consecutive packets. */
790                 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
791                         do {
792                                 vst1q_u64(lmt_addr, cmd0[0]);
793                                 vst1q_u64(lmt_addr + 2, cmd1[0]);
794                                 vst1q_u64(lmt_addr + 4, cmd0[1]);
795                                 vst1q_u64(lmt_addr + 6, cmd1[1]);
796                                 vst1q_u64(lmt_addr + 8, cmd0[2]);
797                                 vst1q_u64(lmt_addr + 10, cmd1[2]);
798                                 vst1q_u64(lmt_addr + 12, cmd0[3]);
799                                 vst1q_u64(lmt_addr + 14, cmd1[3]);
800                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
801                         } while (lmt_status == 0);
802
803                         return;
804                 }
805         }
806
807         for (j = 0; j < NIX_DESCS_PER_LOOP;) {
808                 /* Fit consecutive packets in same LMTLINE. */
809                 if ((segdw[j] + segdw[j + 1]) <= 8) {
810 again0:
811                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
812                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
813                                 vst1q_u64(lmt_addr, cmd0[j]);
814                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
815                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
816                                 /* Copy segs */
817                                 off = segdw[j] - 4;
818                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
819                                 off <<= 1;
820                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
821
822                                 vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
823                                 vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
824                                 vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
825                                 roc_lmt_mov_seg(lmt_addr + 14 + off,
826                                                 slist[j + 1], segdw[j + 1] - 4);
827                                 off += ((segdw[j + 1] - 4) << 1);
828                                 vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
829                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
830                                 vst1q_u64(lmt_addr, cmd0[j]);
831                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
832                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
833                                 /* Copy segs */
834                                 off = segdw[j] - 3;
835                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
836                                 off <<= 1;
837                                 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
838                                 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
839                                 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
840                                 roc_lmt_mov_seg(lmt_addr + 12 + off,
841                                                 slist[j + 1], segdw[j + 1] - 3);
842                         } else {
843                                 vst1q_u64(lmt_addr, cmd0[j]);
844                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
845                                 /* Copy segs */
846                                 off = segdw[j] - 2;
847                                 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
848                                 off <<= 1;
849                                 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
850                                 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
851                                 roc_lmt_mov_seg(lmt_addr + 8 + off,
852                                                 slist[j + 1], segdw[j + 1] - 2);
853                         }
854                         lmt_status = roc_lmt_submit_ldeor(io_addr);
855                         if (lmt_status == 0)
856                                 goto again0;
857                         j += 2;
858                 } else {
859 again1:
860                         if ((flags & NIX_TX_NEED_EXT_HDR) &&
861                             (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
862                                 vst1q_u64(lmt_addr, cmd0[j]);
863                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
864                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
865                                 /* Copy segs */
866                                 off = segdw[j] - 4;
867                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
868                                 off <<= 1;
869                                 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
870                         } else if (flags & NIX_TX_NEED_EXT_HDR) {
871                                 vst1q_u64(lmt_addr, cmd0[j]);
872                                 vst1q_u64(lmt_addr + 2, cmd2[j]);
873                                 vst1q_u64(lmt_addr + 4, cmd1[j]);
874                                 /* Copy segs */
875                                 off = segdw[j] - 3;
876                                 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
877                         } else {
878                                 vst1q_u64(lmt_addr, cmd0[j]);
879                                 vst1q_u64(lmt_addr + 2, cmd1[j]);
880                                 /* Copy segs */
881                                 off = segdw[j] - 2;
882                                 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
883                         }
884                         lmt_status = roc_lmt_submit_ldeor(io_addr);
885                         if (lmt_status == 0)
886                                 goto again1;
887                         j += 1;
888                 }
889         }
890 }
891
892 static __rte_always_inline uint16_t
893 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
894                           uint16_t pkts, uint64_t *cmd, const uint16_t flags)
895 {
896         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
897         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
898         uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
899                 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
900         uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
901         uint64x2_t senddesc01_w0, senddesc23_w0;
902         uint64x2_t senddesc01_w1, senddesc23_w1;
903         uint64x2_t sendext01_w0, sendext23_w0;
904         uint64x2_t sendext01_w1, sendext23_w1;
905         uint64x2_t sendmem01_w0, sendmem23_w0;
906         uint64x2_t sendmem01_w1, sendmem23_w1;
907         uint64x2_t sgdesc01_w0, sgdesc23_w0;
908         uint64x2_t sgdesc01_w1, sgdesc23_w1;
909         struct cn9k_eth_txq *txq = tx_queue;
910         uint64_t *lmt_addr = txq->lmt_addr;
911         rte_iova_t io_addr = txq->io_addr;
912         uint64x2_t ltypes01, ltypes23;
913         uint64x2_t xtmp128, ytmp128;
914         uint64x2_t xmask01, xmask23;
915         uint64_t lmt_status, i;
916         uint16_t pkts_left;
917
918         NIX_XMIT_FC_OR_RETURN(txq, pkts);
919
920         pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
921         pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
922
923         /* Reduce the cached count */
924         txq->fc_cache_pkts -= pkts;
925
926         /* Perform header writes before barrier for TSO */
927         if (flags & NIX_TX_OFFLOAD_TSO_F) {
928                 for (i = 0; i < pkts; i++)
929                         cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
930         }
931
932         /* Lets commit any changes in the packet here as no further changes
933          * to the packet will be done unless no fast free is enabled.
934          */
935         if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
936                 rte_io_wmb();
937
938         senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
939         senddesc23_w0 = senddesc01_w0;
940
941         senddesc01_w1 = vdupq_n_u64(0);
942         senddesc23_w1 = senddesc01_w1;
943         sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
944         sgdesc23_w0 = sgdesc01_w0;
945
946         if (flags & NIX_TX_NEED_EXT_HDR) {
947                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
948                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
949                                                    BIT_ULL(15));
950                         sendmem01_w0 =
951                                 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
952                                             (NIX_SENDMEMALG_SETTSTMP << 56));
953                         sendmem23_w0 = sendmem01_w0;
954                         sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
955                         sendmem23_w1 = sendmem01_w1;
956                 } else {
957                         sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
958                 }
959                 sendext23_w0 = sendext01_w0;
960
961                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
962                         sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
963                 else
964                         sendext01_w1 = vdupq_n_u64(0);
965                 sendext23_w1 = sendext01_w1;
966         }
967
968         for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
969                 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
970                 senddesc01_w0 =
971                         vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
972                 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
973
974                 senddesc23_w0 = senddesc01_w0;
975                 sgdesc23_w0 = sgdesc01_w0;
976
977                 /* Clear vlan enables. */
978                 if (flags & NIX_TX_NEED_EXT_HDR) {
979                         sendext01_w1 = vbicq_u64(sendext01_w1,
980                                                  vdupq_n_u64(0x3FFFF00FFFF00));
981                         sendext23_w1 = sendext01_w1;
982                 }
983
984                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
985                         /* Reset send mem alg to SETTSTMP from SUB*/
986                         sendmem01_w0 = vbicq_u64(sendmem01_w0,
987                                                  vdupq_n_u64(BIT_ULL(59)));
988                         /* Reset send mem address to default. */
989                         sendmem01_w1 =
990                                 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
991                         sendmem23_w0 = sendmem01_w0;
992                         sendmem23_w1 = sendmem01_w1;
993                 }
994
995                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
996                         /* Clear the LSO enable bit. */
997                         sendext01_w0 = vbicq_u64(sendext01_w0,
998                                                  vdupq_n_u64(BIT_ULL(14)));
999                         sendext23_w0 = sendext01_w0;
1000                 }
1001
1002                 /* Move mbufs to iova */
1003                 mbuf0 = (uint64_t *)tx_pkts[0];
1004                 mbuf1 = (uint64_t *)tx_pkts[1];
1005                 mbuf2 = (uint64_t *)tx_pkts[2];
1006                 mbuf3 = (uint64_t *)tx_pkts[3];
1007
1008                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1009                                      offsetof(struct rte_mbuf, buf_iova));
1010                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1011                                      offsetof(struct rte_mbuf, buf_iova));
1012                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1013                                      offsetof(struct rte_mbuf, buf_iova));
1014                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1015                                      offsetof(struct rte_mbuf, buf_iova));
1016                 /*
1017                  * Get mbuf's, olflags, iova, pktlen, dataoff
1018                  * dataoff_iovaX.D[0] = iova,
1019                  * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1020                  * len_olflagsX.D[0] = ol_flags,
1021                  * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1022                  */
1023                 dataoff_iova0 = vld1q_u64(mbuf0);
1024                 len_olflags0 = vld1q_u64(mbuf0 + 2);
1025                 dataoff_iova1 = vld1q_u64(mbuf1);
1026                 len_olflags1 = vld1q_u64(mbuf1 + 2);
1027                 dataoff_iova2 = vld1q_u64(mbuf2);
1028                 len_olflags2 = vld1q_u64(mbuf2 + 2);
1029                 dataoff_iova3 = vld1q_u64(mbuf3);
1030                 len_olflags3 = vld1q_u64(mbuf3 + 2);
1031
1032                 /* Move mbufs to point pool */
1033                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1034                                      offsetof(struct rte_mbuf, pool) -
1035                                      offsetof(struct rte_mbuf, buf_iova));
1036                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1037                                      offsetof(struct rte_mbuf, pool) -
1038                                      offsetof(struct rte_mbuf, buf_iova));
1039                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1040                                      offsetof(struct rte_mbuf, pool) -
1041                                      offsetof(struct rte_mbuf, buf_iova));
1042                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1043                                      offsetof(struct rte_mbuf, pool) -
1044                                      offsetof(struct rte_mbuf, buf_iova));
1045
1046                 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1047                              NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1048                         /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1049                         /*
1050                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1051                          * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1052                          */
1053
1054                         asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1055                                      : [a] "+w"(senddesc01_w1)
1056                                      : [in] "r"(mbuf0 + 2)
1057                                      : "memory");
1058
1059                         asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1060                                      : [a] "+w"(senddesc01_w1)
1061                                      : [in] "r"(mbuf1 + 2)
1062                                      : "memory");
1063
1064                         asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1065                                      : [b] "+w"(senddesc23_w1)
1066                                      : [in] "r"(mbuf2 + 2)
1067                                      : "memory");
1068
1069                         asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1070                                      : [b] "+w"(senddesc23_w1)
1071                                      : [in] "r"(mbuf3 + 2)
1072                                      : "memory");
1073
1074                         /* Get pool pointer alone */
1075                         mbuf0 = (uint64_t *)*mbuf0;
1076                         mbuf1 = (uint64_t *)*mbuf1;
1077                         mbuf2 = (uint64_t *)*mbuf2;
1078                         mbuf3 = (uint64_t *)*mbuf3;
1079                 } else {
1080                         /* Get pool pointer alone */
1081                         mbuf0 = (uint64_t *)*mbuf0;
1082                         mbuf1 = (uint64_t *)*mbuf1;
1083                         mbuf2 = (uint64_t *)*mbuf2;
1084                         mbuf3 = (uint64_t *)*mbuf3;
1085                 }
1086
1087                 const uint8x16_t shuf_mask2 = {
1088                         0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1089                         0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1090                 };
1091                 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1092                 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1093
1094                 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1095                 const uint64x2_t and_mask0 = {
1096                         0xFFFFFFFFFFFFFFFF,
1097                         0x000000000000FFFF,
1098                 };
1099
1100                 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1101                 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1102                 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1103                 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1104
1105                 /*
1106                  * Pick only 16 bits of pktlen preset at bits 63:32
1107                  * and place them at bits 15:0.
1108                  */
1109                 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1110                 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1111
1112                 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1113                 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1114                 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1115
1116                 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1117                  * pktlen at 15:0 position.
1118                  */
1119                 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1120                 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1121                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1122                 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1123
1124                 /* Move mbuf to point to pool_id. */
1125                 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1126                                      offsetof(struct rte_mempool, pool_id));
1127                 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1128                                      offsetof(struct rte_mempool, pool_id));
1129                 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1130                                      offsetof(struct rte_mempool, pool_id));
1131                 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1132                                      offsetof(struct rte_mempool, pool_id));
1133
1134                 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1135                     !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1136                         /*
1137                          * Lookup table to translate ol_flags to
1138                          * il3/il4 types. But we still use ol3/ol4 types in
1139                          * senddesc_w1 as only one header processing is enabled.
1140                          */
1141                         const uint8x16_t tbl = {
1142                                 /* [0-15] = il4type:il3type */
1143                                 0x04, /* none (IPv6 assumed) */
1144                                 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1145                                 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1146                                 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1147                                 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1148                                 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1149                                 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1150                                 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1151                                 0x02, /* RTE_MBUF_F_TX_IPV4  */
1152                                 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1153                                 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1154                                 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1155                                 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1156                                 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1157                                        * RTE_MBUF_F_TX_TCP_CKSUM
1158                                        */
1159                                 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1160                                        * RTE_MBUF_F_TX_SCTP_CKSUM
1161                                        */
1162                                 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1163                                        * RTE_MBUF_F_TX_UDP_CKSUM
1164                                        */
1165                         };
1166
1167                         /* Extract olflags to translate to iltypes */
1168                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1169                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1170
1171                         /*
1172                          * E(47):L3_LEN(9):L2_LEN(7+z)
1173                          * E(47):L3_LEN(9):L2_LEN(7+z)
1174                          */
1175                         senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1176                         senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1177
1178                         /* Move OLFLAGS bits 55:52 to 51:48
1179                          * with zeros preprended on the byte and rest
1180                          * don't care
1181                          */
1182                         xtmp128 = vshrq_n_u8(xtmp128, 4);
1183                         ytmp128 = vshrq_n_u8(ytmp128, 4);
1184                         /*
1185                          * E(48):L3_LEN(8):L2_LEN(z+7)
1186                          * E(48):L3_LEN(8):L2_LEN(z+7)
1187                          */
1188                         const int8x16_t tshft3 = {
1189                                 -1, 0, 8, 8, 8, 8, 8, 8,
1190                                 -1, 0, 8, 8, 8, 8, 8, 8,
1191                         };
1192
1193                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1194                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1195
1196                         /* Do the lookup */
1197                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1198                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1199
1200                         /* Pick only relevant fields i.e Bit 48:55 of iltype
1201                          * and place it in ol3/ol4type of senddesc_w1
1202                          */
1203                         const uint8x16_t shuf_mask0 = {
1204                                 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1205                                 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1206                         };
1207
1208                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1209                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1210
1211                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1212                          * a [E(32):E(16):OL3(8):OL2(8)]
1213                          * a = a + (a << 8)
1214                          * a [E(32):E(16):(OL3+OL2):OL2]
1215                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1216                          */
1217                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1218                                                  vshlq_n_u16(senddesc01_w1, 8));
1219                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1220                                                  vshlq_n_u16(senddesc23_w1, 8));
1221
1222                         /* Move ltypes to senddesc*_w1 */
1223                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1224                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1225                 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1226                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1227                         /*
1228                          * Lookup table to translate ol_flags to
1229                          * ol3/ol4 types.
1230                          */
1231
1232                         const uint8x16_t tbl = {
1233                                 /* [0-15] = ol4type:ol3type */
1234                                 0x00, /* none */
1235                                 0x03, /* OUTER_IP_CKSUM */
1236                                 0x02, /* OUTER_IPV4 */
1237                                 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1238                                 0x04, /* OUTER_IPV6 */
1239                                 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1240                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1241                                 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1242                                        * OUTER_IP_CKSUM
1243                                        */
1244                                 0x00, /* OUTER_UDP_CKSUM */
1245                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1246                                 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1247                                 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1248                                        * OUTER_IP_CKSUM
1249                                        */
1250                                 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1251                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1252                                        * OUTER_IP_CKSUM
1253                                        */
1254                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1255                                        * OUTER_IPV4
1256                                        */
1257                                 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1258                                        * OUTER_IPV4 | OUTER_IP_CKSUM
1259                                        */
1260                         };
1261
1262                         /* Extract olflags to translate to iltypes */
1263                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1264                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1265
1266                         /*
1267                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1268                          * E(47):OL3_LEN(9):OL2_LEN(7+z)
1269                          */
1270                         const uint8x16_t shuf_mask5 = {
1271                                 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1272                                 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1273                         };
1274                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1275                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1276
1277                         /* Extract outer ol flags only */
1278                         const uint64x2_t o_cksum_mask = {
1279                                 0x1C00020000000000,
1280                                 0x1C00020000000000,
1281                         };
1282
1283                         xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1284                         ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1285
1286                         /* Extract OUTER_UDP_CKSUM bit 41 and
1287                          * move it to bit 61
1288                          */
1289
1290                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1291                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1292
1293                         /* Shift oltype by 2 to start nibble from BIT(56)
1294                          * instead of BIT(58)
1295                          */
1296                         xtmp128 = vshrq_n_u8(xtmp128, 2);
1297                         ytmp128 = vshrq_n_u8(ytmp128, 2);
1298                         /*
1299                          * E(48):L3_LEN(8):L2_LEN(z+7)
1300                          * E(48):L3_LEN(8):L2_LEN(z+7)
1301                          */
1302                         const int8x16_t tshft3 = {
1303                                 -1, 0, 8, 8, 8, 8, 8, 8,
1304                                 -1, 0, 8, 8, 8, 8, 8, 8,
1305                         };
1306
1307                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1308                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1309
1310                         /* Do the lookup */
1311                         ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1312                         ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1313
1314                         /* Pick only relevant fields i.e Bit 56:63 of oltype
1315                          * and place it in ol3/ol4type of senddesc_w1
1316                          */
1317                         const uint8x16_t shuf_mask0 = {
1318                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1319                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1320                         };
1321
1322                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1323                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1324
1325                         /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1326                          * a [E(32):E(16):OL3(8):OL2(8)]
1327                          * a = a + (a << 8)
1328                          * a [E(32):E(16):(OL3+OL2):OL2]
1329                          * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1330                          */
1331                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1332                                                  vshlq_n_u16(senddesc01_w1, 8));
1333                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1334                                                  vshlq_n_u16(senddesc23_w1, 8));
1335
1336                         /* Move ltypes to senddesc*_w1 */
1337                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1338                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1339                 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1340                            (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1341                         /* Lookup table to translate ol_flags to
1342                          * ol4type, ol3type, il4type, il3type of senddesc_w1
1343                          */
1344                         const uint8x16x2_t tbl = {{
1345                                 {
1346                                         /* [0-15] = il4type:il3type */
1347                                         0x04, /* none (IPv6) */
1348                                         0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1349                                         0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1350                                         0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1351                                         0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1352                                         0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1353                                                * RTE_MBUF_F_TX_TCP_CKSUM
1354                                                */
1355                                         0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
1356                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1357                                                */
1358                                         0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
1359                                                * RTE_MBUF_F_TX_UDP_CKSUM
1360                                                */
1361                                         0x02, /* RTE_MBUF_F_TX_IPV4 */
1362                                         0x12, /* RTE_MBUF_F_TX_IPV4 |
1363                                                * RTE_MBUF_F_TX_TCP_CKSUM
1364                                                */
1365                                         0x22, /* RTE_MBUF_F_TX_IPV4 |
1366                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1367                                                */
1368                                         0x32, /* RTE_MBUF_F_TX_IPV4 |
1369                                                * RTE_MBUF_F_TX_UDP_CKSUM
1370                                                */
1371                                         0x03, /* RTE_MBUF_F_TX_IPV4 |
1372                                                * RTE_MBUF_F_TX_IP_CKSUM
1373                                                */
1374                                         0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1375                                                * RTE_MBUF_F_TX_TCP_CKSUM
1376                                                */
1377                                         0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1378                                                * RTE_MBUF_F_TX_SCTP_CKSUM
1379                                                */
1380                                         0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1381                                                * RTE_MBUF_F_TX_UDP_CKSUM
1382                                                */
1383                                 },
1384
1385                                 {
1386                                         /* [16-31] = ol4type:ol3type */
1387                                         0x00, /* none */
1388                                         0x03, /* OUTER_IP_CKSUM */
1389                                         0x02, /* OUTER_IPV4 */
1390                                         0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1391                                         0x04, /* OUTER_IPV6 */
1392                                         0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1393                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1394                                         0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1395                                                * OUTER_IP_CKSUM
1396                                                */
1397                                         0x00, /* OUTER_UDP_CKSUM */
1398                                         0x33, /* OUTER_UDP_CKSUM |
1399                                                * OUTER_IP_CKSUM
1400                                                */
1401                                         0x32, /* OUTER_UDP_CKSUM |
1402                                                * OUTER_IPV4
1403                                                */
1404                                         0x33, /* OUTER_UDP_CKSUM |
1405                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1406                                                */
1407                                         0x34, /* OUTER_UDP_CKSUM |
1408                                                * OUTER_IPV6
1409                                                */
1410                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1411                                                * OUTER_IP_CKSUM
1412                                                */
1413                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1414                                                * OUTER_IPV4
1415                                                */
1416                                         0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1417                                                * OUTER_IPV4 | OUTER_IP_CKSUM
1418                                                */
1419                                 },
1420                         }};
1421
1422                         /* Extract olflags to translate to oltype & iltype */
1423                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1424                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1425
1426                         /*
1427                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1428                          * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1429                          */
1430                         const uint32x4_t tshft_4 = {
1431                                 1,
1432                                 0,
1433                                 1,
1434                                 0,
1435                         };
1436                         senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1437                         senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1438
1439                         /*
1440                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1441                          * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1442                          */
1443                         const uint8x16_t shuf_mask5 = {
1444                                 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1445                                 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1446                         };
1447                         senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1448                         senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1449
1450                         /* Extract outer and inner header ol_flags */
1451                         const uint64x2_t oi_cksum_mask = {
1452                                 0x1CF0020000000000,
1453                                 0x1CF0020000000000,
1454                         };
1455
1456                         xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1457                         ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1458
1459                         /* Extract OUTER_UDP_CKSUM bit 41 and
1460                          * move it to bit 61
1461                          */
1462
1463                         xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1464                         ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1465
1466                         /* Shift right oltype by 2 and iltype by 4
1467                          * to start oltype nibble from BIT(58)
1468                          * instead of BIT(56) and iltype nibble from BIT(48)
1469                          * instead of BIT(52).
1470                          */
1471                         const int8x16_t tshft5 = {
1472                                 8, 8, 8, 8, 8, 8, -4, -2,
1473                                 8, 8, 8, 8, 8, 8, -4, -2,
1474                         };
1475
1476                         xtmp128 = vshlq_u8(xtmp128, tshft5);
1477                         ytmp128 = vshlq_u8(ytmp128, tshft5);
1478                         /*
1479                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1480                          * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1481                          */
1482                         const int8x16_t tshft3 = {
1483                                 -1, 0, -1, 0, 0, 0, 0, 0,
1484                                 -1, 0, -1, 0, 0, 0, 0, 0,
1485                         };
1486
1487                         senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1488                         senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1489
1490                         /* Mark Bit(4) of oltype */
1491                         const uint64x2_t oi_cksum_mask2 = {
1492                                 0x1000000000000000,
1493                                 0x1000000000000000,
1494                         };
1495
1496                         xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1497                         ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1498
1499                         /* Do the lookup */
1500                         ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1501                         ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1502
1503                         /* Pick only relevant fields i.e Bit 48:55 of iltype and
1504                          * Bit 56:63 of oltype and place it in corresponding
1505                          * place in senddesc_w1.
1506                          */
1507                         const uint8x16_t shuf_mask0 = {
1508                                 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1509                                 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1510                         };
1511
1512                         ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1513                         ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1514
1515                         /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1516                          * l3len, l2len, ol3len, ol2len.
1517                          * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1518                          * a = a + (a << 8)
1519                          * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1520                          * a = a + (a << 16)
1521                          * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1522                          * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1523                          */
1524                         senddesc01_w1 = vaddq_u8(senddesc01_w1,
1525                                                  vshlq_n_u32(senddesc01_w1, 8));
1526                         senddesc23_w1 = vaddq_u8(senddesc23_w1,
1527                                                  vshlq_n_u32(senddesc23_w1, 8));
1528
1529                         /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1530                         senddesc01_w1 = vaddq_u8(
1531                                 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1532                         senddesc23_w1 = vaddq_u8(
1533                                 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1534
1535                         /* Move ltypes to senddesc*_w1 */
1536                         senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1537                         senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1538                 }
1539
1540                 xmask01 = vdupq_n_u64(0);
1541                 xmask23 = xmask01;
1542                 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1543                              : [a] "+w"(xmask01)
1544                              : [in] "r"(mbuf0)
1545                              : "memory");
1546
1547                 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1548                              : [a] "+w"(xmask01)
1549                              : [in] "r"(mbuf1)
1550                              : "memory");
1551
1552                 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1553                              : [b] "+w"(xmask23)
1554                              : [in] "r"(mbuf2)
1555                              : "memory");
1556
1557                 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1558                              : [b] "+w"(xmask23)
1559                              : [in] "r"(mbuf3)
1560                              : "memory");
1561                 xmask01 = vshlq_n_u64(xmask01, 20);
1562                 xmask23 = vshlq_n_u64(xmask23, 20);
1563
1564                 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1565                 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1566
1567                 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1568                         /* Tx ol_flag for vlan. */
1569                         const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
1570                         /* Bit enable for VLAN1 */
1571                         const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1572                         /* Tx ol_flag for QnQ. */
1573                         const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
1574                         /* Bit enable for VLAN0 */
1575                         const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1576                         /* Load vlan values from packet. outer is VLAN 0 */
1577                         uint64x2_t ext01 = {
1578                                 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1579                                         ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1580                                 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1581                                         ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1582                         };
1583                         uint64x2_t ext23 = {
1584                                 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1585                                         ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1586                                 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1587                                         ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1588                         };
1589
1590                         /* Get ol_flags of the packets. */
1591                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1592                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1593
1594                         /* ORR vlan outer/inner values into cmd. */
1595                         sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1596                         sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1597
1598                         /* Test for offload enable bits and generate masks. */
1599                         xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1600                                                       mlv),
1601                                             vandq_u64(vtstq_u64(xtmp128, olq),
1602                                                       mlq));
1603                         ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1604                                                       mlv),
1605                                             vandq_u64(vtstq_u64(ytmp128, olq),
1606                                                       mlq));
1607
1608                         /* Set vlan enable bits into cmd based on mask. */
1609                         sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1610                         sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1611                 }
1612
1613                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1614                         /* Tx ol_flag for timestamp. */
1615                         const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
1616                                                 RTE_MBUF_F_TX_IEEE1588_TMST};
1617                         /* Set send mem alg to SUB. */
1618                         const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1619                         /* Increment send mem address by 8. */
1620                         const uint64x2_t addr = {0x8, 0x8};
1621
1622                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1623                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1624
1625                         /* Check if timestamp is requested and generate inverted
1626                          * mask as we need not make any changes to default cmd
1627                          * value.
1628                          */
1629                         xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1630                         ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1631
1632                         /* Change send mem address to an 8 byte offset when
1633                          * TSTMP is disabled.
1634                          */
1635                         sendmem01_w1 = vaddq_u64(sendmem01_w1,
1636                                                  vandq_u64(xtmp128, addr));
1637                         sendmem23_w1 = vaddq_u64(sendmem23_w1,
1638                                                  vandq_u64(ytmp128, addr));
1639                         /* Change send mem alg to SUB when TSTMP is disabled. */
1640                         sendmem01_w0 = vorrq_u64(sendmem01_w0,
1641                                                  vandq_u64(xtmp128, alg));
1642                         sendmem23_w0 = vorrq_u64(sendmem23_w0,
1643                                                  vandq_u64(ytmp128, alg));
1644
1645                         cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1646                         cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1647                         cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1648                         cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1649                 }
1650
1651                 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1652                         uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1653                         uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1654
1655                         /* Extract SD W1 as we need to set L4 types. */
1656                         vst1q_u64(sd_w1, senddesc01_w1);
1657                         vst1q_u64(sd_w1 + 2, senddesc23_w1);
1658
1659                         /* Extract SX W0 as we need to set LSO fields. */
1660                         vst1q_u64(sx_w0, sendext01_w0);
1661                         vst1q_u64(sx_w0 + 2, sendext23_w0);
1662
1663                         /* Extract ol_flags. */
1664                         xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1665                         ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1666
1667                         /* Prepare individual mbufs. */
1668                         cn9k_nix_prepare_tso(tx_pkts[0],
1669                                 (union nix_send_hdr_w1_u *)&sd_w1[0],
1670                                 (union nix_send_ext_w0_u *)&sx_w0[0],
1671                                 vgetq_lane_u64(xtmp128, 0), flags);
1672
1673                         cn9k_nix_prepare_tso(tx_pkts[1],
1674                                 (union nix_send_hdr_w1_u *)&sd_w1[1],
1675                                 (union nix_send_ext_w0_u *)&sx_w0[1],
1676                                 vgetq_lane_u64(xtmp128, 1), flags);
1677
1678                         cn9k_nix_prepare_tso(tx_pkts[2],
1679                                 (union nix_send_hdr_w1_u *)&sd_w1[2],
1680                                 (union nix_send_ext_w0_u *)&sx_w0[2],
1681                                 vgetq_lane_u64(ytmp128, 0), flags);
1682
1683                         cn9k_nix_prepare_tso(tx_pkts[3],
1684                                 (union nix_send_hdr_w1_u *)&sd_w1[3],
1685                                 (union nix_send_ext_w0_u *)&sx_w0[3],
1686                                 vgetq_lane_u64(ytmp128, 1), flags);
1687
1688                         senddesc01_w1 = vld1q_u64(sd_w1);
1689                         senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1690
1691                         sendext01_w0 = vld1q_u64(sx_w0);
1692                         sendext23_w0 = vld1q_u64(sx_w0 + 2);
1693                 }
1694
1695                 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1696                     !(flags & NIX_TX_MULTI_SEG_F)) {
1697                         /* Set don't free bit if reference count > 1 */
1698                         xmask01 = vdupq_n_u64(0);
1699                         xmask23 = xmask01;
1700
1701                         /* Move mbufs to iova */
1702                         mbuf0 = (uint64_t *)tx_pkts[0];
1703                         mbuf1 = (uint64_t *)tx_pkts[1];
1704                         mbuf2 = (uint64_t *)tx_pkts[2];
1705                         mbuf3 = (uint64_t *)tx_pkts[3];
1706
1707                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1708                                 vsetq_lane_u64(0x80000, xmask01, 0);
1709                         else
1710                                 RTE_MEMPOOL_CHECK_COOKIES(
1711                                         ((struct rte_mbuf *)mbuf0)->pool,
1712                                         (void **)&mbuf0, 1, 0);
1713
1714                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1715                                 vsetq_lane_u64(0x80000, xmask01, 1);
1716                         else
1717                                 RTE_MEMPOOL_CHECK_COOKIES(
1718                                         ((struct rte_mbuf *)mbuf1)->pool,
1719                                         (void **)&mbuf1, 1, 0);
1720
1721                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1722                                 vsetq_lane_u64(0x80000, xmask23, 0);
1723                         else
1724                                 RTE_MEMPOOL_CHECK_COOKIES(
1725                                         ((struct rte_mbuf *)mbuf2)->pool,
1726                                         (void **)&mbuf2, 1, 0);
1727
1728                         if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1729                                 vsetq_lane_u64(0x80000, xmask23, 1);
1730                         else
1731                                 RTE_MEMPOOL_CHECK_COOKIES(
1732                                         ((struct rte_mbuf *)mbuf3)->pool,
1733                                         (void **)&mbuf3, 1, 0);
1734                         senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1735                         senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1736                         /* Ensuring mbuf fields which got updated in
1737                          * cnxk_nix_prefree_seg are written before LMTST.
1738                          */
1739                         rte_io_wmb();
1740                 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1741                         /* Move mbufs to iova */
1742                         mbuf0 = (uint64_t *)tx_pkts[0];
1743                         mbuf1 = (uint64_t *)tx_pkts[1];
1744                         mbuf2 = (uint64_t *)tx_pkts[2];
1745                         mbuf3 = (uint64_t *)tx_pkts[3];
1746
1747                         /* Mark mempool object as "put" since
1748                          * it is freed by NIX
1749                          */
1750                         RTE_MEMPOOL_CHECK_COOKIES(
1751                                 ((struct rte_mbuf *)mbuf0)->pool,
1752                                 (void **)&mbuf0, 1, 0);
1753
1754                         RTE_MEMPOOL_CHECK_COOKIES(
1755                                 ((struct rte_mbuf *)mbuf1)->pool,
1756                                 (void **)&mbuf1, 1, 0);
1757
1758                         RTE_MEMPOOL_CHECK_COOKIES(
1759                                 ((struct rte_mbuf *)mbuf2)->pool,
1760                                 (void **)&mbuf2, 1, 0);
1761
1762                         RTE_MEMPOOL_CHECK_COOKIES(
1763                                 ((struct rte_mbuf *)mbuf3)->pool,
1764                                 (void **)&mbuf3, 1, 0);
1765 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1766                         rte_io_wmb();
1767 #endif
1768                 }
1769
1770                 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1771                 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1772                 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1773                 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1774                 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1775
1776                 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1777                 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1778                 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1779                 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1780
1781                 if (flags & NIX_TX_NEED_EXT_HDR) {
1782                         cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1783                         cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1784                         cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1785                         cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1786                 }
1787
1788                 if (flags & NIX_TX_MULTI_SEG_F) {
1789                         uint64_t seg_list[NIX_DESCS_PER_LOOP]
1790                                          [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
1791                         uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
1792
1793                         /* Build mseg list for each packet individually. */
1794                         for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1795                                 segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
1796                                                         seg_list[j], &cmd0[j],
1797                                                         &cmd1[j], flags);
1798                         segdw[4] = 8;
1799
1800                         /* Commit all changes to mbuf before LMTST. */
1801                         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1802                                 rte_io_wmb();
1803
1804                         cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
1805                                                        segdw, seg_list,
1806                                                        lmt_addr, io_addr,
1807                                                        flags);
1808                 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1809                         /* With ext header in the command we can no longer send
1810                          * all 4 packets together since LMTLINE is 128bytes.
1811                          * Split and Tx twice.
1812                          */
1813                         do {
1814                                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1815                                         vst1q_u64(lmt_addr, cmd0[0]);
1816                                         vst1q_u64(lmt_addr + 2, cmd2[0]);
1817                                         vst1q_u64(lmt_addr + 4, cmd1[0]);
1818                                         vst1q_u64(lmt_addr + 6, cmd3[0]);
1819                                         vst1q_u64(lmt_addr + 8, cmd0[1]);
1820                                         vst1q_u64(lmt_addr + 10, cmd2[1]);
1821                                         vst1q_u64(lmt_addr + 12, cmd1[1]);
1822                                         vst1q_u64(lmt_addr + 14, cmd3[1]);
1823                                 } else {
1824                                         vst1q_u64(lmt_addr, cmd0[0]);
1825                                         vst1q_u64(lmt_addr + 2, cmd2[0]);
1826                                         vst1q_u64(lmt_addr + 4, cmd1[0]);
1827                                         vst1q_u64(lmt_addr + 6, cmd0[1]);
1828                                         vst1q_u64(lmt_addr + 8, cmd2[1]);
1829                                         vst1q_u64(lmt_addr + 10, cmd1[1]);
1830                                 }
1831                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1832                         } while (lmt_status == 0);
1833
1834                         do {
1835                                 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1836                                         vst1q_u64(lmt_addr, cmd0[2]);
1837                                         vst1q_u64(lmt_addr + 2, cmd2[2]);
1838                                         vst1q_u64(lmt_addr + 4, cmd1[2]);
1839                                         vst1q_u64(lmt_addr + 6, cmd3[2]);
1840                                         vst1q_u64(lmt_addr + 8, cmd0[3]);
1841                                         vst1q_u64(lmt_addr + 10, cmd2[3]);
1842                                         vst1q_u64(lmt_addr + 12, cmd1[3]);
1843                                         vst1q_u64(lmt_addr + 14, cmd3[3]);
1844                                 } else {
1845                                         vst1q_u64(lmt_addr, cmd0[2]);
1846                                         vst1q_u64(lmt_addr + 2, cmd2[2]);
1847                                         vst1q_u64(lmt_addr + 4, cmd1[2]);
1848                                         vst1q_u64(lmt_addr + 6, cmd0[3]);
1849                                         vst1q_u64(lmt_addr + 8, cmd2[3]);
1850                                         vst1q_u64(lmt_addr + 10, cmd1[3]);
1851                                 }
1852                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1853                         } while (lmt_status == 0);
1854                 } else {
1855                         do {
1856                                 vst1q_u64(lmt_addr, cmd0[0]);
1857                                 vst1q_u64(lmt_addr + 2, cmd1[0]);
1858                                 vst1q_u64(lmt_addr + 4, cmd0[1]);
1859                                 vst1q_u64(lmt_addr + 6, cmd1[1]);
1860                                 vst1q_u64(lmt_addr + 8, cmd0[2]);
1861                                 vst1q_u64(lmt_addr + 10, cmd1[2]);
1862                                 vst1q_u64(lmt_addr + 12, cmd0[3]);
1863                                 vst1q_u64(lmt_addr + 14, cmd1[3]);
1864                                 lmt_status = roc_lmt_submit_ldeor(io_addr);
1865                         } while (lmt_status == 0);
1866                 }
1867                 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1868         }
1869
1870         if (unlikely(pkts_left)) {
1871                 if (flags & NIX_TX_MULTI_SEG_F)
1872                         pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
1873                                                         pkts_left, cmd, flags);
1874                 else
1875                         pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
1876                                                    cmd, flags);
1877         }
1878
1879         return pkts;
1880 }
1881
1882 #else
1883 static __rte_always_inline uint16_t
1884 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1885                           uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1886 {
1887         RTE_SET_USED(tx_queue);
1888         RTE_SET_USED(tx_pkts);
1889         RTE_SET_USED(pkts);
1890         RTE_SET_USED(cmd);
1891         RTE_SET_USED(flags);
1892         return 0;
1893 }
1894 #endif
1895
1896 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
1897 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1898 #define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
1899 #define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
1900 #define TSO_F        NIX_TX_OFFLOAD_TSO_F
1901 #define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
1902 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
1903
1904 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1905 #define NIX_TX_FASTPATH_MODES_0_15                                             \
1906         T(no_offload, 6, NIX_TX_OFFLOAD_NONE)                                  \
1907         T(l3l4csum, 6, L3L4CSUM_F)                                             \
1908         T(ol3ol4csum, 6, OL3OL4CSUM_F)                                         \
1909         T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F)                   \
1910         T(vlan, 6, VLAN_F)                                                     \
1911         T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
1912         T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
1913         T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1914         T(noff, 6, NOFF_F)                                                     \
1915         T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F)                               \
1916         T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F)                           \
1917         T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1918         T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
1919         T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
1920         T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
1921         T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
1922           NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1923
1924 #define NIX_TX_FASTPATH_MODES_16_31                                            \
1925         T(tso, 6, TSO_F)                                                       \
1926         T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
1927         T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
1928         T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
1929         T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
1930         T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
1931         T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
1932         T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
1933           TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1934         T(tso_noff, 6, TSO_F | NOFF_F)                                         \
1935         T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
1936         T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
1937         T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
1938           TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1939         T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
1940         T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
1941         T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1942         T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
1943           TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1944
1945 #define NIX_TX_FASTPATH_MODES_32_47                                            \
1946         T(ts, 8, TSP_F)                                                        \
1947         T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
1948         T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
1949         T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
1950         T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
1951         T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
1952         T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
1953         T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
1954           TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1955         T(ts_noff, 8, TSP_F | NOFF_F)                                          \
1956         T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
1957         T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
1958         T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
1959           TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
1960         T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
1961         T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
1962         T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
1963         T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
1964           TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1965
1966 #define NIX_TX_FASTPATH_MODES_48_63                                            \
1967         T(ts_tso, 8, TSP_F | TSO_F)                                            \
1968         T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
1969         T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
1970         T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
1971           TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
1972         T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
1973         T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
1974         T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
1975         T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
1976           TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
1977         T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
1978         T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
1979         T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
1980         T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
1981           TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
1982         T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
1983         T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
1984           TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
1985         T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
1986           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
1987         T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
1988           TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1989
1990 #define NIX_TX_FASTPATH_MODES_64_79                                            \
1991         T(sec, 6, T_SEC_F)                                                     \
1992         T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F)                               \
1993         T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F)                           \
1994         T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
1995         T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
1996         T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
1997         T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
1998         T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
1999           T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2000         T(sec_noff, 6, T_SEC_F | NOFF_F)                                       \
2001         T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2002         T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2003         T(sec_noff_ol3ol4csum_l3l4csum, 6,                                     \
2004           T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2005         T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2006         T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2007         T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2008           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2009         T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2010           T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2011
2012 #define NIX_TX_FASTPATH_MODES_80_95                                            \
2013         T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2014         T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2015         T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2016         T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2017           T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2018         T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2019         T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2020         T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2021         T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2022           T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2023         T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2024         T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2025         T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2026         T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2027           T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2028         T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2029         T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2030           T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2031         T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2032           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2033         T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2034           T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2035
2036 #define NIX_TX_FASTPATH_MODES_96_111                                           \
2037         T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2038         T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2039         T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2040         T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2041           T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2042         T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2043         T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2044         T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2045         T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2046           T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2047         T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2048         T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2049         T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2050         T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2051           T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2052         T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2053         T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2054           T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2055         T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2056           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2057         T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2058           T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2059
2060 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2061         T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2062         T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2063         T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2064         T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2065           T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2066         T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2067         T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2068           T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2069         T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2070           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2071         T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2072           T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2073         T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2074         T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2075           T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2076         T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2077           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2078         T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2079           T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2080         T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2081         T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2082           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2083         T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2084           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2085         T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2086           T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2087                   L3L4CSUM_F)
2088
2089 #define NIX_TX_FASTPATH_MODES                                                  \
2090         NIX_TX_FASTPATH_MODES_0_15                                             \
2091         NIX_TX_FASTPATH_MODES_16_31                                            \
2092         NIX_TX_FASTPATH_MODES_32_47                                            \
2093         NIX_TX_FASTPATH_MODES_48_63                                            \
2094         NIX_TX_FASTPATH_MODES_64_79                                            \
2095         NIX_TX_FASTPATH_MODES_80_95                                            \
2096         NIX_TX_FASTPATH_MODES_96_111                                           \
2097         NIX_TX_FASTPATH_MODES_112_127
2098
2099 #define T(name, sz, flags)                                                     \
2100         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name(           \
2101                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2102         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name(      \
2103                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2104         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name(       \
2105                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2106         uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name(  \
2107                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2108
2109 NIX_TX_FASTPATH_MODES
2110 #undef T
2111
2112 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2113         uint16_t __rte_noinline __rte_hot fn(                                  \
2114                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2115         {                                                                      \
2116                 uint64_t cmd[sz];                                              \
2117                 /* For TSO inner checksum is a must */                         \
2118                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2119                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2120                         return 0;                                              \
2121                 return cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd,        \
2122                                           flags);                              \
2123         }
2124
2125 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2126         uint16_t __rte_noinline __rte_hot fn(                                  \
2127                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2128         {                                                                      \
2129                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2130                 /* For TSO inner checksum is a must */                         \
2131                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2132                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2133                         return 0;                                              \
2134                 return cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,   \
2135                                                (flags) | NIX_TX_MULTI_SEG_F);  \
2136         }
2137
2138 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
2139         uint16_t __rte_noinline __rte_hot fn(                                  \
2140                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2141         {                                                                      \
2142                 uint64_t cmd[sz];                                              \
2143                 /* For TSO inner checksum is a must */                         \
2144                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2145                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2146                         return 0;                                              \
2147                 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2148                                                  (flags));                     \
2149         }
2150
2151 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
2152         uint16_t __rte_noinline __rte_hot fn(                                  \
2153                 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2154         {                                                                      \
2155                 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2156                 /* For TSO inner checksum is a must */                         \
2157                 if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2158                     !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2159                         return 0;                                              \
2160                 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2161                                                  (flags) |                     \
2162                                                          NIX_TX_MULTI_SEG_F);  \
2163         }
2164
2165 #endif /* __CN9K_TX_H__ */