1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
17 /* Flags to control xmit_prepare function.
18 * Defining it from backwards to denote its been
19 * not used as offload flags to pick function
21 #define NIX_TX_MULTI_SEG_F BIT(15)
23 #define NIX_TX_NEED_SEND_HDR_W1 \
24 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
25 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
27 #define NIX_TX_NEED_EXT_HDR \
28 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
31 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
33 /* Cached value is low, Update the fc_cache_pkts */ \
34 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
35 /* Multiply with sqe_per_sqb to express in pkts */ \
36 (txq)->fc_cache_pkts = \
37 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
38 << (txq)->sqes_per_sqb_log2; \
39 /* Check it again for the room */ \
40 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
45 /* Function to determine no of tx subdesc required in case ext
46 * sub desc is enabled.
48 static __rte_always_inline int
49 cn9k_nix_tx_ext_subs(const uint16_t flags)
51 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
54 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
59 static __rte_always_inline void
60 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
62 uint64_t mask, ol_flags = m->ol_flags;
64 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
65 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
66 uint16_t *iplen, *oiplen, *oudplen;
67 uint16_t lso_sb, paylen;
69 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
70 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
71 m->l2_len + m->l3_len + m->l4_len;
73 /* Reduce payload len from base headers */
74 paylen = m->pkt_len - lso_sb;
76 /* Get iplen position assuming no tunnel hdr */
77 iplen = (uint16_t *)(mdata + m->l2_len +
78 (2 << !!(ol_flags & PKT_TX_IPV6)));
79 /* Handle tunnel tso */
80 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
81 (ol_flags & PKT_TX_TUNNEL_MASK)) {
82 const uint8_t is_udp_tun =
83 (CNXK_NIX_UDP_TUN_BITMASK >>
84 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
87 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
90 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
93 /* Update format for UDP tunneled packet */
95 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
97 *oudplen = rte_cpu_to_be_16(
98 rte_be_to_cpu_16(*oudplen) - paylen);
101 /* Update iplen position to inner ip hdr */
102 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
104 (2 << !!(ol_flags & PKT_TX_IPV6)));
107 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
111 static __rte_always_inline void
112 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
113 const uint64_t lso_tun_fmt)
115 struct nix_send_ext_s *send_hdr_ext;
116 struct nix_send_hdr_s *send_hdr;
117 uint64_t ol_flags = 0, mask;
118 union nix_send_hdr_w1_u w1;
119 union nix_send_sg_s *sg;
121 send_hdr = (struct nix_send_hdr_s *)cmd;
122 if (flags & NIX_TX_NEED_EXT_HDR) {
123 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
124 sg = (union nix_send_sg_s *)(cmd + 4);
125 /* Clear previous markings */
126 send_hdr_ext->w0.lso = 0;
127 send_hdr_ext->w1.u = 0;
129 sg = (union nix_send_sg_s *)(cmd + 2);
132 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
133 ol_flags = m->ol_flags;
137 if (!(flags & NIX_TX_MULTI_SEG_F)) {
138 send_hdr->w0.total = m->data_len;
140 roc_npa_aura_handle_to_aura(m->pool->pool_id);
145 * 3 => IPV4 with csum
147 * L3type and L3ptr needs to be set for either
148 * L3 csum or L4 csum or LSO
152 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
153 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
154 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
155 const uint8_t ol3type =
156 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
157 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
158 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
161 w1.ol3type = ol3type;
162 mask = 0xffffull << ((!!ol3type) << 4);
163 w1.ol3ptr = ~mask & m->outer_l2_len;
164 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
167 w1.ol4type = csum + (csum << 1);
170 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
171 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
172 w1.il3ptr = w1.ol4ptr + m->l2_len;
173 w1.il4ptr = w1.il3ptr + m->l3_len;
174 /* Increment it by 1 if it is IPV4 as 3 is with csum */
175 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
178 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
180 /* In case of no tunnel header use only
181 * shift IL3/IL4 fields a bit to use
182 * OL3/OL4 for header checksum
185 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
186 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
188 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
189 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
190 const uint8_t outer_l2_len = m->outer_l2_len;
193 w1.ol3ptr = outer_l2_len;
194 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
195 /* Increment it by 1 if it is IPV4 as 3 is with csum */
196 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
197 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
198 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
201 w1.ol4type = csum + (csum << 1);
203 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
204 const uint8_t l2_len = m->l2_len;
206 /* Always use OLXPTR and OLXTYPE when only
207 * when one header is present
212 w1.ol4ptr = l2_len + m->l3_len;
213 /* Increment it by 1 if it is IPV4 as 3 is with csum */
214 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
215 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
216 !!(ol_flags & PKT_TX_IP_CKSUM);
219 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
222 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
223 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
224 /* HW will update ptr after vlan0 update */
225 send_hdr_ext->w1.vlan1_ins_ptr = 12;
226 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
228 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
229 /* 2B before end of l2 header */
230 send_hdr_ext->w1.vlan0_ins_ptr = 12;
231 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
234 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
238 mask = -(!w1.il3type);
239 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
241 send_hdr_ext->w0.lso_sb = lso_sb;
242 send_hdr_ext->w0.lso = 1;
243 send_hdr_ext->w0.lso_mps = m->tso_segsz;
244 send_hdr_ext->w0.lso_format =
245 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
246 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
248 /* Handle tunnel tso */
249 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
250 (ol_flags & PKT_TX_TUNNEL_MASK)) {
251 const uint8_t is_udp_tun =
252 (CNXK_NIX_UDP_TUN_BITMASK >>
253 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
255 uint8_t shift = is_udp_tun ? 32 : 0;
257 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
258 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
260 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
261 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
262 /* Update format for UDP tunneled packet */
263 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
267 if (flags & NIX_TX_NEED_SEND_HDR_W1)
268 send_hdr->w1.u = w1.u;
270 if (!(flags & NIX_TX_MULTI_SEG_F)) {
271 sg->seg1_size = m->data_len;
272 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
274 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
275 /* DF bit = 1 if refcount of current mbuf or parent mbuf
277 * DF bit = 0 otherwise
279 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
280 /* Ensuring mbuf fields which got updated in
281 * cnxk_nix_prefree_seg are written before LMTST.
285 /* Mark mempool object as "put" since it is freed by NIX */
286 if (!send_hdr->w0.df)
287 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
291 static __rte_always_inline void
292 cn9k_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc,
293 const uint64_t ol_flags, const uint16_t no_segdw,
294 const uint16_t flags)
296 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
297 struct nix_send_mem_s *send_mem;
298 uint16_t off = (no_segdw - 1) << 1;
299 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
301 send_mem = (struct nix_send_mem_s *)(cmd + off);
302 if (flags & NIX_TX_MULTI_SEG_F) {
303 /* Retrieving the default desc values */
304 cmd[off] = send_mem_desc[6];
306 /* Using compiler barier to avoid voilation of C
309 rte_compiler_barrier();
312 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
313 * should not be recorded, hence changing the alg type to
314 * NIX_SENDMEMALG_SET and also changing send mem addr field to
315 * next 8 bytes as it corrpt the actual tx tstamp registered
318 send_mem->w0.cn9k.alg =
319 NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
321 send_mem->addr = (rte_iova_t)((uint64_t *)send_mem_desc[7] +
326 static __rte_always_inline void
327 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
328 const uint32_t flags)
333 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
334 lmt_status = roc_lmt_submit_ldeor(io_addr);
335 } while (lmt_status == 0);
338 static __rte_always_inline void
339 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
341 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
344 static __rte_always_inline uint64_t
345 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
347 return roc_lmt_submit_ldeor(io_addr);
350 static __rte_always_inline uint64_t
351 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
353 return roc_lmt_submit_ldeorl(io_addr);
356 static __rte_always_inline uint16_t
357 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
359 struct nix_send_hdr_s *send_hdr;
360 union nix_send_sg_s *sg;
361 struct rte_mbuf *m_next;
362 uint64_t *slist, sg_u;
367 send_hdr = (struct nix_send_hdr_s *)cmd;
368 send_hdr->w0.total = m->pkt_len;
369 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
371 if (flags & NIX_TX_NEED_EXT_HDR)
376 sg = (union nix_send_sg_s *)&cmd[2 + off];
377 /* Clear sg->u header before use */
378 sg->u &= 0xFC00000000000000;
380 slist = &cmd[3 + off];
383 nb_segs = m->nb_segs;
385 /* Fill mbuf segments */
388 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
389 *slist = rte_mbuf_data_iova(m);
390 /* Set invert df if buffer is not to be freed by H/W */
391 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
392 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
393 /* Commit changes to mbuf */
396 /* Mark mempool object as "put" since it is freed by NIX */
397 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
398 if (!(sg_u & (1ULL << (i + 55))))
399 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
405 if (i > 2 && nb_segs) {
407 /* Next SG subdesc */
408 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
411 sg = (union nix_send_sg_s *)slist;
420 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
421 /* Roundup extra dwords to multiple of 2 */
422 segdw = (segdw >> 1) + (segdw & 0x1);
424 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
425 send_hdr->w0.sizem1 = segdw - 1;
430 static __rte_always_inline void
431 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
433 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
436 static __rte_always_inline void
437 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
443 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
444 lmt_status = roc_lmt_submit_ldeor(io_addr);
445 } while (lmt_status == 0);
448 static __rte_always_inline void
449 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
450 rte_iova_t io_addr, uint16_t segdw)
456 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
457 lmt_status = roc_lmt_submit_ldeor(io_addr);
458 } while (lmt_status == 0);
461 static __rte_always_inline uint16_t
462 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
463 uint64_t *cmd, const uint16_t flags)
465 struct cn9k_eth_txq *txq = tx_queue;
466 const rte_iova_t io_addr = txq->io_addr;
467 void *lmt_addr = txq->lmt_addr;
468 uint64_t lso_tun_fmt;
471 NIX_XMIT_FC_OR_RETURN(txq, pkts);
473 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
475 /* Perform header writes before barrier for TSO */
476 if (flags & NIX_TX_OFFLOAD_TSO_F) {
477 lso_tun_fmt = txq->lso_tun_fmt;
479 for (i = 0; i < pkts; i++)
480 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
483 /* Lets commit any changes in the packet here as no further changes
484 * to the packet will be done unless no fast free is enabled.
486 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
489 for (i = 0; i < pkts; i++) {
490 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
491 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
492 tx_pkts[i]->ol_flags, 4, flags);
493 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
496 /* Reduce the cached count */
497 txq->fc_cache_pkts -= pkts;
502 static __rte_always_inline uint16_t
503 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
504 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
506 struct cn9k_eth_txq *txq = tx_queue;
507 const rte_iova_t io_addr = txq->io_addr;
508 void *lmt_addr = txq->lmt_addr;
509 uint64_t lso_tun_fmt;
513 NIX_XMIT_FC_OR_RETURN(txq, pkts);
515 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
517 /* Perform header writes before barrier for TSO */
518 if (flags & NIX_TX_OFFLOAD_TSO_F) {
519 lso_tun_fmt = txq->lso_tun_fmt;
521 for (i = 0; i < pkts; i++)
522 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
525 /* Lets commit any changes in the packet here as no further changes
526 * to the packet will be done unless no fast free is enabled.
528 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
531 for (i = 0; i < pkts; i++) {
532 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
533 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
534 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
535 tx_pkts[i]->ol_flags, segdw,
537 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
540 /* Reduce the cached count */
541 txq->fc_cache_pkts -= pkts;
546 #if defined(RTE_ARCH_ARM64)
548 static __rte_always_inline void
549 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
550 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
556 if (!(ol_flags & PKT_TX_TCP_SEG))
559 mask = -(!w1->il3type);
560 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
564 w0->lso_mps = m->tso_segsz;
565 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
566 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
568 /* Handle tunnel tso */
569 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
570 (ol_flags & PKT_TX_TUNNEL_MASK)) {
571 const uint8_t is_udp_tun =
572 (CNXK_NIX_UDP_TUN_BITMASK >>
573 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
576 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
577 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
578 /* Update format for UDP tunneled packet */
579 w0->lso_format += is_udp_tun ? 2 : 6;
581 w0->lso_format += !!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
585 static __rte_always_inline uint8_t
586 cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
587 union nix_send_hdr_w0_u *sh,
588 union nix_send_sg_s *sg, const uint32_t flags)
590 struct rte_mbuf *m_next;
591 uint64_t *slist, sg_u;
596 sh->total = m->pkt_len;
597 /* Clear sg->u header before use */
598 sg->u &= 0xFC00000000000000;
602 sg_u = sg_u | ((uint64_t)m->data_len);
604 nb_segs = m->nb_segs - 1;
607 /* Set invert df if buffer is not to be freed by H/W */
608 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
609 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
610 /* Mark mempool object as "put" since it is freed by NIX */
611 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
612 if (!(sg_u & (1ULL << 55)))
613 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
618 /* Fill mbuf segments */
621 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
622 *slist = rte_mbuf_data_iova(m);
623 /* Set invert df if buffer is not to be freed by H/W */
624 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
625 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
626 /* Mark mempool object as "put" since it is freed by NIX
628 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
629 if (!(sg_u & (1ULL << (i + 55))))
630 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
636 if (i > 2 && nb_segs) {
638 /* Next SG subdesc */
639 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
642 sg = (union nix_send_sg_s *)slist;
651 segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
654 /* Roundup extra dwords to multiple of 2 */
655 segdw = (segdw >> 1) + (segdw & 0x1);
657 segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
658 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
659 sh->sizem1 = segdw - 1;
664 static __rte_always_inline uint8_t
665 cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
666 uint64x2_t *cmd1, const uint32_t flags)
668 union nix_send_hdr_w0_u sh;
669 union nix_send_sg_s sg;
672 if (m->nb_segs == 1) {
673 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
674 sg.u = vgetq_lane_u64(cmd1[0], 0);
675 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
676 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
679 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
680 sg.u = vgetq_lane_u64(cmd1[0], 0);
681 if (!(sg.u & (1ULL << 55)))
682 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
685 return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
686 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
689 sh.u = vgetq_lane_u64(cmd0[0], 0);
690 sg.u = vgetq_lane_u64(cmd1[0], 0);
692 ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
694 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
695 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
699 #define NIX_DESCS_PER_LOOP 4
701 static __rte_always_inline void
702 cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
703 uint64x2_t *cmd2, uint64x2_t *cmd3,
705 uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
706 uint64_t *lmt_addr, rte_iova_t io_addr,
707 const uint32_t flags)
712 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
713 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
714 /* No segments in 4 consecutive packets. */
715 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
717 vst1q_u64(lmt_addr, cmd0[0]);
718 vst1q_u64(lmt_addr + 2, cmd1[0]);
719 vst1q_u64(lmt_addr + 4, cmd0[1]);
720 vst1q_u64(lmt_addr + 6, cmd1[1]);
721 vst1q_u64(lmt_addr + 8, cmd0[2]);
722 vst1q_u64(lmt_addr + 10, cmd1[2]);
723 vst1q_u64(lmt_addr + 12, cmd0[3]);
724 vst1q_u64(lmt_addr + 14, cmd1[3]);
725 lmt_status = roc_lmt_submit_ldeor(io_addr);
726 } while (lmt_status == 0);
732 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
733 /* Fit consecutive packets in same LMTLINE. */
734 if ((segdw[j] + segdw[j + 1]) <= 8) {
736 if ((flags & NIX_TX_NEED_EXT_HDR) &&
737 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
738 vst1q_u64(lmt_addr, cmd0[j]);
739 vst1q_u64(lmt_addr + 2, cmd2[j]);
740 vst1q_u64(lmt_addr + 4, cmd1[j]);
743 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
745 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
747 vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
748 vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
749 vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
750 roc_lmt_mov_seg(lmt_addr + 14 + off,
751 slist[j + 1], segdw[j + 1] - 4);
752 off += ((segdw[j + 1] - 4) << 1);
753 vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
754 } else if (flags & NIX_TX_NEED_EXT_HDR) {
755 vst1q_u64(lmt_addr, cmd0[j]);
756 vst1q_u64(lmt_addr + 2, cmd2[j]);
757 vst1q_u64(lmt_addr + 4, cmd1[j]);
760 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
762 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
763 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
764 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
765 roc_lmt_mov_seg(lmt_addr + 12 + off,
766 slist[j + 1], segdw[j + 1] - 3);
768 vst1q_u64(lmt_addr, cmd0[j]);
769 vst1q_u64(lmt_addr + 2, cmd1[j]);
772 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
774 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
775 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
776 roc_lmt_mov_seg(lmt_addr + 8 + off,
777 slist[j + 1], segdw[j + 1] - 2);
779 lmt_status = roc_lmt_submit_ldeor(io_addr);
785 if ((flags & NIX_TX_NEED_EXT_HDR) &&
786 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
787 vst1q_u64(lmt_addr, cmd0[j]);
788 vst1q_u64(lmt_addr + 2, cmd2[j]);
789 vst1q_u64(lmt_addr + 4, cmd1[j]);
792 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
794 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
795 } else if (flags & NIX_TX_NEED_EXT_HDR) {
796 vst1q_u64(lmt_addr, cmd0[j]);
797 vst1q_u64(lmt_addr + 2, cmd2[j]);
798 vst1q_u64(lmt_addr + 4, cmd1[j]);
801 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
803 vst1q_u64(lmt_addr, cmd0[j]);
804 vst1q_u64(lmt_addr + 2, cmd1[j]);
807 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
809 lmt_status = roc_lmt_submit_ldeor(io_addr);
817 static __rte_always_inline uint16_t
818 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
819 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
821 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
822 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
823 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
824 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
825 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
826 uint64x2_t senddesc01_w0, senddesc23_w0;
827 uint64x2_t senddesc01_w1, senddesc23_w1;
828 uint64x2_t sendext01_w0, sendext23_w0;
829 uint64x2_t sendext01_w1, sendext23_w1;
830 uint64x2_t sendmem01_w0, sendmem23_w0;
831 uint64x2_t sendmem01_w1, sendmem23_w1;
832 uint64x2_t sgdesc01_w0, sgdesc23_w0;
833 uint64x2_t sgdesc01_w1, sgdesc23_w1;
834 struct cn9k_eth_txq *txq = tx_queue;
835 uint64_t *lmt_addr = txq->lmt_addr;
836 rte_iova_t io_addr = txq->io_addr;
837 uint64x2_t ltypes01, ltypes23;
838 uint64x2_t xtmp128, ytmp128;
839 uint64x2_t xmask01, xmask23;
840 uint64_t lmt_status, i;
843 NIX_XMIT_FC_OR_RETURN(txq, pkts);
845 pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
846 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
848 /* Reduce the cached count */
849 txq->fc_cache_pkts -= pkts;
851 /* Perform header writes before barrier for TSO */
852 if (flags & NIX_TX_OFFLOAD_TSO_F) {
853 for (i = 0; i < pkts; i++)
854 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
857 /* Lets commit any changes in the packet here as no further changes
858 * to the packet will be done unless no fast free is enabled.
860 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
863 senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
864 senddesc23_w0 = senddesc01_w0;
865 senddesc01_w1 = vdupq_n_u64(0);
866 senddesc23_w1 = senddesc01_w1;
868 /* Load command defaults into vector variables. */
869 if (flags & NIX_TX_NEED_EXT_HDR) {
870 sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
871 sendext23_w0 = sendext01_w0;
872 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
873 sendext23_w1 = sendext01_w1;
874 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
875 sgdesc23_w0 = sgdesc01_w0;
876 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
877 sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
878 sendmem23_w0 = sendmem01_w0;
879 sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
880 sendmem23_w1 = sendmem01_w1;
883 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
884 sgdesc23_w0 = sgdesc01_w0;
887 for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
888 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
890 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
891 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
893 senddesc23_w0 = senddesc01_w0;
894 sgdesc23_w0 = sgdesc01_w0;
896 /* Clear vlan enables. */
897 if (flags & NIX_TX_NEED_EXT_HDR) {
898 sendext01_w1 = vbicq_u64(sendext01_w1,
899 vdupq_n_u64(0x3FFFF00FFFF00));
900 sendext23_w1 = sendext01_w1;
903 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
904 /* Reset send mem alg to SETTSTMP from SUB*/
905 sendmem01_w0 = vbicq_u64(sendmem01_w0,
906 vdupq_n_u64(BIT_ULL(59)));
907 /* Reset send mem address to default. */
909 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
910 sendmem23_w0 = sendmem01_w0;
911 sendmem23_w1 = sendmem01_w1;
914 if (flags & NIX_TX_OFFLOAD_TSO_F) {
915 /* Clear the LSO enable bit. */
916 sendext01_w0 = vbicq_u64(sendext01_w0,
917 vdupq_n_u64(BIT_ULL(14)));
918 sendext23_w0 = sendext01_w0;
921 /* Move mbufs to iova */
922 mbuf0 = (uint64_t *)tx_pkts[0];
923 mbuf1 = (uint64_t *)tx_pkts[1];
924 mbuf2 = (uint64_t *)tx_pkts[2];
925 mbuf3 = (uint64_t *)tx_pkts[3];
927 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
928 offsetof(struct rte_mbuf, buf_iova));
929 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
930 offsetof(struct rte_mbuf, buf_iova));
931 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
932 offsetof(struct rte_mbuf, buf_iova));
933 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
934 offsetof(struct rte_mbuf, buf_iova));
936 * Get mbuf's, olflags, iova, pktlen, dataoff
937 * dataoff_iovaX.D[0] = iova,
938 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
939 * len_olflagsX.D[0] = ol_flags,
940 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
942 dataoff_iova0 = vld1q_u64(mbuf0);
943 len_olflags0 = vld1q_u64(mbuf0 + 2);
944 dataoff_iova1 = vld1q_u64(mbuf1);
945 len_olflags1 = vld1q_u64(mbuf1 + 2);
946 dataoff_iova2 = vld1q_u64(mbuf2);
947 len_olflags2 = vld1q_u64(mbuf2 + 2);
948 dataoff_iova3 = vld1q_u64(mbuf3);
949 len_olflags3 = vld1q_u64(mbuf3 + 2);
951 /* Move mbufs to point pool */
952 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
953 offsetof(struct rte_mbuf, pool) -
954 offsetof(struct rte_mbuf, buf_iova));
955 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
956 offsetof(struct rte_mbuf, pool) -
957 offsetof(struct rte_mbuf, buf_iova));
958 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
959 offsetof(struct rte_mbuf, pool) -
960 offsetof(struct rte_mbuf, buf_iova));
961 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
962 offsetof(struct rte_mbuf, pool) -
963 offsetof(struct rte_mbuf, buf_iova));
965 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
966 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
967 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
969 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
970 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
973 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
974 : [a] "+w"(senddesc01_w1)
975 : [in] "r"(mbuf0 + 2)
978 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
979 : [a] "+w"(senddesc01_w1)
980 : [in] "r"(mbuf1 + 2)
983 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
984 : [b] "+w"(senddesc23_w1)
985 : [in] "r"(mbuf2 + 2)
988 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
989 : [b] "+w"(senddesc23_w1)
990 : [in] "r"(mbuf3 + 2)
993 /* Get pool pointer alone */
994 mbuf0 = (uint64_t *)*mbuf0;
995 mbuf1 = (uint64_t *)*mbuf1;
996 mbuf2 = (uint64_t *)*mbuf2;
997 mbuf3 = (uint64_t *)*mbuf3;
999 /* Get pool pointer alone */
1000 mbuf0 = (uint64_t *)*mbuf0;
1001 mbuf1 = (uint64_t *)*mbuf1;
1002 mbuf2 = (uint64_t *)*mbuf2;
1003 mbuf3 = (uint64_t *)*mbuf3;
1006 const uint8x16_t shuf_mask2 = {
1007 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1008 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1010 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1011 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1013 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1014 const uint64x2_t and_mask0 = {
1019 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1020 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1021 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1022 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1025 * Pick only 16 bits of pktlen preset at bits 63:32
1026 * and place them at bits 15:0.
1028 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1029 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1031 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1032 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1033 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1035 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1036 * pktlen at 15:0 position.
1038 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1039 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1040 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1041 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1043 /* Move mbuf to point to pool_id. */
1044 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1045 offsetof(struct rte_mempool, pool_id));
1046 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1047 offsetof(struct rte_mempool, pool_id));
1048 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1049 offsetof(struct rte_mempool, pool_id));
1050 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1051 offsetof(struct rte_mempool, pool_id));
1053 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1054 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1056 * Lookup table to translate ol_flags to
1057 * il3/il4 types. But we still use ol3/ol4 types in
1058 * senddesc_w1 as only one header processing is enabled.
1060 const uint8x16_t tbl = {
1061 /* [0-15] = il4type:il3type */
1062 0x04, /* none (IPv6 assumed) */
1063 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
1064 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
1065 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
1066 0x03, /* PKT_TX_IP_CKSUM */
1067 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
1068 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
1069 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
1070 0x02, /* PKT_TX_IPV4 */
1071 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
1072 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
1073 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
1074 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
1075 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1078 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1081 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1086 /* Extract olflags to translate to iltypes */
1087 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1088 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1091 * E(47):L3_LEN(9):L2_LEN(7+z)
1092 * E(47):L3_LEN(9):L2_LEN(7+z)
1094 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1095 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1097 /* Move OLFLAGS bits 55:52 to 51:48
1098 * with zeros preprended on the byte and rest
1101 xtmp128 = vshrq_n_u8(xtmp128, 4);
1102 ytmp128 = vshrq_n_u8(ytmp128, 4);
1104 * E(48):L3_LEN(8):L2_LEN(z+7)
1105 * E(48):L3_LEN(8):L2_LEN(z+7)
1107 const int8x16_t tshft3 = {
1108 -1, 0, 8, 8, 8, 8, 8, 8,
1109 -1, 0, 8, 8, 8, 8, 8, 8,
1112 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1113 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1116 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1117 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1119 /* Pick only relevant fields i.e Bit 48:55 of iltype
1120 * and place it in ol3/ol4type of senddesc_w1
1122 const uint8x16_t shuf_mask0 = {
1123 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1124 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1127 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1128 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1130 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1131 * a [E(32):E(16):OL3(8):OL2(8)]
1133 * a [E(32):E(16):(OL3+OL2):OL2]
1134 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1136 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1137 vshlq_n_u16(senddesc01_w1, 8));
1138 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1139 vshlq_n_u16(senddesc23_w1, 8));
1141 /* Move ltypes to senddesc*_w1 */
1142 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1143 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1144 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1145 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1147 * Lookup table to translate ol_flags to
1151 const uint8x16_t tbl = {
1152 /* [0-15] = ol4type:ol3type */
1154 0x03, /* OUTER_IP_CKSUM */
1155 0x02, /* OUTER_IPV4 */
1156 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1157 0x04, /* OUTER_IPV6 */
1158 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1159 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1160 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1163 0x00, /* OUTER_UDP_CKSUM */
1164 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1165 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1166 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1169 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1170 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1173 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1176 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1177 * OUTER_IPV4 | OUTER_IP_CKSUM
1181 /* Extract olflags to translate to iltypes */
1182 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1183 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1186 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1187 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1189 const uint8x16_t shuf_mask5 = {
1190 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1191 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1193 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1194 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1196 /* Extract outer ol flags only */
1197 const uint64x2_t o_cksum_mask = {
1202 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1203 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1205 /* Extract OUTER_UDP_CKSUM bit 41 and
1209 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1210 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1212 /* Shift oltype by 2 to start nibble from BIT(56)
1213 * instead of BIT(58)
1215 xtmp128 = vshrq_n_u8(xtmp128, 2);
1216 ytmp128 = vshrq_n_u8(ytmp128, 2);
1218 * E(48):L3_LEN(8):L2_LEN(z+7)
1219 * E(48):L3_LEN(8):L2_LEN(z+7)
1221 const int8x16_t tshft3 = {
1222 -1, 0, 8, 8, 8, 8, 8, 8,
1223 -1, 0, 8, 8, 8, 8, 8, 8,
1226 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1227 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1230 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1231 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1233 /* Pick only relevant fields i.e Bit 56:63 of oltype
1234 * and place it in ol3/ol4type of senddesc_w1
1236 const uint8x16_t shuf_mask0 = {
1237 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1238 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1241 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1242 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1244 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1245 * a [E(32):E(16):OL3(8):OL2(8)]
1247 * a [E(32):E(16):(OL3+OL2):OL2]
1248 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1250 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1251 vshlq_n_u16(senddesc01_w1, 8));
1252 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1253 vshlq_n_u16(senddesc23_w1, 8));
1255 /* Move ltypes to senddesc*_w1 */
1256 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1257 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1258 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1259 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1260 /* Lookup table to translate ol_flags to
1261 * ol4type, ol3type, il4type, il3type of senddesc_w1
1263 const uint8x16x2_t tbl = {{
1265 /* [0-15] = il4type:il3type */
1266 0x04, /* none (IPv6) */
1267 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
1268 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
1269 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
1270 0x03, /* PKT_TX_IP_CKSUM */
1271 0x13, /* PKT_TX_IP_CKSUM |
1274 0x23, /* PKT_TX_IP_CKSUM |
1277 0x33, /* PKT_TX_IP_CKSUM |
1280 0x02, /* PKT_TX_IPV4 */
1281 0x12, /* PKT_TX_IPV4 |
1284 0x22, /* PKT_TX_IPV4 |
1287 0x32, /* PKT_TX_IPV4 |
1290 0x03, /* PKT_TX_IPV4 |
1293 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1296 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1299 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1305 /* [16-31] = ol4type:ol3type */
1307 0x03, /* OUTER_IP_CKSUM */
1308 0x02, /* OUTER_IPV4 */
1309 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1310 0x04, /* OUTER_IPV6 */
1311 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1312 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1313 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1316 0x00, /* OUTER_UDP_CKSUM */
1317 0x33, /* OUTER_UDP_CKSUM |
1320 0x32, /* OUTER_UDP_CKSUM |
1323 0x33, /* OUTER_UDP_CKSUM |
1324 * OUTER_IPV4 | OUTER_IP_CKSUM
1326 0x34, /* OUTER_UDP_CKSUM |
1329 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1332 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1335 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1336 * OUTER_IPV4 | OUTER_IP_CKSUM
1341 /* Extract olflags to translate to oltype & iltype */
1342 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1343 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1346 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1347 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1349 const uint32x4_t tshft_4 = {
1355 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1356 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1359 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1360 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1362 const uint8x16_t shuf_mask5 = {
1363 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1364 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1366 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1367 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1369 /* Extract outer and inner header ol_flags */
1370 const uint64x2_t oi_cksum_mask = {
1375 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1376 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1378 /* Extract OUTER_UDP_CKSUM bit 41 and
1382 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1383 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1385 /* Shift right oltype by 2 and iltype by 4
1386 * to start oltype nibble from BIT(58)
1387 * instead of BIT(56) and iltype nibble from BIT(48)
1388 * instead of BIT(52).
1390 const int8x16_t tshft5 = {
1391 8, 8, 8, 8, 8, 8, -4, -2,
1392 8, 8, 8, 8, 8, 8, -4, -2,
1395 xtmp128 = vshlq_u8(xtmp128, tshft5);
1396 ytmp128 = vshlq_u8(ytmp128, tshft5);
1398 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1399 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1401 const int8x16_t tshft3 = {
1402 -1, 0, -1, 0, 0, 0, 0, 0,
1403 -1, 0, -1, 0, 0, 0, 0, 0,
1406 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1407 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1409 /* Mark Bit(4) of oltype */
1410 const uint64x2_t oi_cksum_mask2 = {
1415 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1416 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1419 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1420 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1422 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1423 * Bit 56:63 of oltype and place it in corresponding
1424 * place in senddesc_w1.
1426 const uint8x16_t shuf_mask0 = {
1427 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1428 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1431 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1432 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1434 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1435 * l3len, l2len, ol3len, ol2len.
1436 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1438 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1440 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1441 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1443 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1444 vshlq_n_u32(senddesc01_w1, 8));
1445 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1446 vshlq_n_u32(senddesc23_w1, 8));
1448 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1449 senddesc01_w1 = vaddq_u8(
1450 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1451 senddesc23_w1 = vaddq_u8(
1452 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1454 /* Move ltypes to senddesc*_w1 */
1455 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1456 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1459 xmask01 = vdupq_n_u64(0);
1461 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1466 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1471 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1476 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1480 xmask01 = vshlq_n_u64(xmask01, 20);
1481 xmask23 = vshlq_n_u64(xmask23, 20);
1483 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1484 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1486 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1487 /* Tx ol_flag for vlan. */
1488 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1489 /* Bit enable for VLAN1 */
1490 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1491 /* Tx ol_flag for QnQ. */
1492 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1493 /* Bit enable for VLAN0 */
1494 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1495 /* Load vlan values from packet. outer is VLAN 0 */
1496 uint64x2_t ext01 = {
1497 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1498 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1499 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1500 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1502 uint64x2_t ext23 = {
1503 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1504 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1505 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1506 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1509 /* Get ol_flags of the packets. */
1510 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1511 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1513 /* ORR vlan outer/inner values into cmd. */
1514 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1515 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1517 /* Test for offload enable bits and generate masks. */
1518 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1520 vandq_u64(vtstq_u64(xtmp128, olq),
1522 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1524 vandq_u64(vtstq_u64(ytmp128, olq),
1527 /* Set vlan enable bits into cmd based on mask. */
1528 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1529 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1532 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1533 /* Tx ol_flag for timestam. */
1534 const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
1535 PKT_TX_IEEE1588_TMST};
1536 /* Set send mem alg to SUB. */
1537 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1538 /* Increment send mem address by 8. */
1539 const uint64x2_t addr = {0x8, 0x8};
1541 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1542 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1544 /* Check if timestamp is requested and generate inverted
1545 * mask as we need not make any changes to default cmd
1548 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1549 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1551 /* Change send mem address to an 8 byte offset when
1552 * TSTMP is disabled.
1554 sendmem01_w1 = vaddq_u64(sendmem01_w1,
1555 vandq_u64(xtmp128, addr));
1556 sendmem23_w1 = vaddq_u64(sendmem23_w1,
1557 vandq_u64(ytmp128, addr));
1558 /* Change send mem alg to SUB when TSTMP is disabled. */
1559 sendmem01_w0 = vorrq_u64(sendmem01_w0,
1560 vandq_u64(xtmp128, alg));
1561 sendmem23_w0 = vorrq_u64(sendmem23_w0,
1562 vandq_u64(ytmp128, alg));
1564 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1565 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1566 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1567 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1570 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1571 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1572 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1574 /* Extract SD W1 as we need to set L4 types. */
1575 vst1q_u64(sd_w1, senddesc01_w1);
1576 vst1q_u64(sd_w1 + 2, senddesc23_w1);
1578 /* Extract SX W0 as we need to set LSO fields. */
1579 vst1q_u64(sx_w0, sendext01_w0);
1580 vst1q_u64(sx_w0 + 2, sendext23_w0);
1582 /* Extract ol_flags. */
1583 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1584 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1586 /* Prepare individual mbufs. */
1587 cn9k_nix_prepare_tso(tx_pkts[0],
1588 (union nix_send_hdr_w1_u *)&sd_w1[0],
1589 (union nix_send_ext_w0_u *)&sx_w0[0],
1590 vgetq_lane_u64(xtmp128, 0), flags);
1592 cn9k_nix_prepare_tso(tx_pkts[1],
1593 (union nix_send_hdr_w1_u *)&sd_w1[1],
1594 (union nix_send_ext_w0_u *)&sx_w0[1],
1595 vgetq_lane_u64(xtmp128, 1), flags);
1597 cn9k_nix_prepare_tso(tx_pkts[2],
1598 (union nix_send_hdr_w1_u *)&sd_w1[2],
1599 (union nix_send_ext_w0_u *)&sx_w0[2],
1600 vgetq_lane_u64(ytmp128, 0), flags);
1602 cn9k_nix_prepare_tso(tx_pkts[3],
1603 (union nix_send_hdr_w1_u *)&sd_w1[3],
1604 (union nix_send_ext_w0_u *)&sx_w0[3],
1605 vgetq_lane_u64(ytmp128, 1), flags);
1607 senddesc01_w1 = vld1q_u64(sd_w1);
1608 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1610 sendext01_w0 = vld1q_u64(sx_w0);
1611 sendext23_w0 = vld1q_u64(sx_w0 + 2);
1614 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1615 !(flags & NIX_TX_MULTI_SEG_F)) {
1616 /* Set don't free bit if reference count > 1 */
1617 xmask01 = vdupq_n_u64(0);
1620 /* Move mbufs to iova */
1621 mbuf0 = (uint64_t *)tx_pkts[0];
1622 mbuf1 = (uint64_t *)tx_pkts[1];
1623 mbuf2 = (uint64_t *)tx_pkts[2];
1624 mbuf3 = (uint64_t *)tx_pkts[3];
1626 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1627 vsetq_lane_u64(0x80000, xmask01, 0);
1629 __mempool_check_cookies(
1630 ((struct rte_mbuf *)mbuf0)->pool,
1631 (void **)&mbuf0, 1, 0);
1633 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1634 vsetq_lane_u64(0x80000, xmask01, 1);
1636 __mempool_check_cookies(
1637 ((struct rte_mbuf *)mbuf1)->pool,
1638 (void **)&mbuf1, 1, 0);
1640 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1641 vsetq_lane_u64(0x80000, xmask23, 0);
1643 __mempool_check_cookies(
1644 ((struct rte_mbuf *)mbuf2)->pool,
1645 (void **)&mbuf2, 1, 0);
1647 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1648 vsetq_lane_u64(0x80000, xmask23, 1);
1650 __mempool_check_cookies(
1651 ((struct rte_mbuf *)mbuf3)->pool,
1652 (void **)&mbuf3, 1, 0);
1653 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1654 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1655 /* Ensuring mbuf fields which got updated in
1656 * cnxk_nix_prefree_seg are written before LMTST.
1659 } else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1660 /* Move mbufs to iova */
1661 mbuf0 = (uint64_t *)tx_pkts[0];
1662 mbuf1 = (uint64_t *)tx_pkts[1];
1663 mbuf2 = (uint64_t *)tx_pkts[2];
1664 mbuf3 = (uint64_t *)tx_pkts[3];
1666 /* Mark mempool object as "put" since
1667 * it is freed by NIX
1669 __mempool_check_cookies(
1670 ((struct rte_mbuf *)mbuf0)->pool,
1671 (void **)&mbuf0, 1, 0);
1673 __mempool_check_cookies(
1674 ((struct rte_mbuf *)mbuf1)->pool,
1675 (void **)&mbuf1, 1, 0);
1677 __mempool_check_cookies(
1678 ((struct rte_mbuf *)mbuf2)->pool,
1679 (void **)&mbuf2, 1, 0);
1681 __mempool_check_cookies(
1682 ((struct rte_mbuf *)mbuf3)->pool,
1683 (void **)&mbuf3, 1, 0);
1684 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1689 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1690 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1691 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1692 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1693 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1695 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1696 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1697 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1698 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1700 if (flags & NIX_TX_NEED_EXT_HDR) {
1701 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1702 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1703 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1704 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1707 if (flags & NIX_TX_MULTI_SEG_F) {
1708 uint64_t seg_list[NIX_DESCS_PER_LOOP]
1709 [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
1710 uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
1712 /* Build mseg list for each packet individually. */
1713 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1714 segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
1715 seg_list[j], &cmd0[j],
1719 /* Commit all changes to mbuf before LMTST. */
1720 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1723 cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
1727 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1728 /* With ext header in the command we can no longer send
1729 * all 4 packets together since LMTLINE is 128bytes.
1730 * Split and Tx twice.
1733 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1734 vst1q_u64(lmt_addr, cmd0[0]);
1735 vst1q_u64(lmt_addr + 2, cmd2[0]);
1736 vst1q_u64(lmt_addr + 4, cmd1[0]);
1737 vst1q_u64(lmt_addr + 6, cmd3[0]);
1738 vst1q_u64(lmt_addr + 8, cmd0[1]);
1739 vst1q_u64(lmt_addr + 10, cmd2[1]);
1740 vst1q_u64(lmt_addr + 12, cmd1[1]);
1741 vst1q_u64(lmt_addr + 14, cmd3[1]);
1743 vst1q_u64(lmt_addr, cmd0[0]);
1744 vst1q_u64(lmt_addr + 2, cmd2[0]);
1745 vst1q_u64(lmt_addr + 4, cmd1[0]);
1746 vst1q_u64(lmt_addr + 6, cmd0[1]);
1747 vst1q_u64(lmt_addr + 8, cmd2[1]);
1748 vst1q_u64(lmt_addr + 10, cmd1[1]);
1750 lmt_status = roc_lmt_submit_ldeor(io_addr);
1751 } while (lmt_status == 0);
1754 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1755 vst1q_u64(lmt_addr, cmd0[2]);
1756 vst1q_u64(lmt_addr + 2, cmd2[2]);
1757 vst1q_u64(lmt_addr + 4, cmd1[2]);
1758 vst1q_u64(lmt_addr + 6, cmd3[2]);
1759 vst1q_u64(lmt_addr + 8, cmd0[3]);
1760 vst1q_u64(lmt_addr + 10, cmd2[3]);
1761 vst1q_u64(lmt_addr + 12, cmd1[3]);
1762 vst1q_u64(lmt_addr + 14, cmd3[3]);
1764 vst1q_u64(lmt_addr, cmd0[2]);
1765 vst1q_u64(lmt_addr + 2, cmd2[2]);
1766 vst1q_u64(lmt_addr + 4, cmd1[2]);
1767 vst1q_u64(lmt_addr + 6, cmd0[3]);
1768 vst1q_u64(lmt_addr + 8, cmd2[3]);
1769 vst1q_u64(lmt_addr + 10, cmd1[3]);
1771 lmt_status = roc_lmt_submit_ldeor(io_addr);
1772 } while (lmt_status == 0);
1775 vst1q_u64(lmt_addr, cmd0[0]);
1776 vst1q_u64(lmt_addr + 2, cmd1[0]);
1777 vst1q_u64(lmt_addr + 4, cmd0[1]);
1778 vst1q_u64(lmt_addr + 6, cmd1[1]);
1779 vst1q_u64(lmt_addr + 8, cmd0[2]);
1780 vst1q_u64(lmt_addr + 10, cmd1[2]);
1781 vst1q_u64(lmt_addr + 12, cmd0[3]);
1782 vst1q_u64(lmt_addr + 14, cmd1[3]);
1783 lmt_status = roc_lmt_submit_ldeor(io_addr);
1784 } while (lmt_status == 0);
1786 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1789 if (unlikely(pkts_left)) {
1790 if (flags & NIX_TX_MULTI_SEG_F)
1791 pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
1792 pkts_left, cmd, flags);
1794 pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
1802 static __rte_always_inline uint16_t
1803 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1804 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1806 RTE_SET_USED(tx_queue);
1807 RTE_SET_USED(tx_pkts);
1810 RTE_SET_USED(flags);
1815 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1816 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1817 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1818 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1819 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1820 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1822 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1823 #define NIX_TX_FASTPATH_MODES \
1824 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
1825 NIX_TX_OFFLOAD_NONE) \
1826 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
1828 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
1830 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
1831 OL3OL4CSUM_F | L3L4CSUM_F) \
1832 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
1834 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
1835 VLAN_F | L3L4CSUM_F) \
1836 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
1837 VLAN_F | OL3OL4CSUM_F) \
1838 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
1839 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1840 T(noff, 0, 0, 1, 0, 0, 0, 4, \
1842 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
1843 NOFF_F | L3L4CSUM_F) \
1844 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
1845 NOFF_F | OL3OL4CSUM_F) \
1846 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
1847 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1848 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
1850 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
1851 NOFF_F | VLAN_F | L3L4CSUM_F) \
1852 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
1853 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1854 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
1855 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1856 T(tso, 0, 1, 0, 0, 0, 0, 6, \
1858 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
1859 TSO_F | L3L4CSUM_F) \
1860 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
1861 TSO_F | OL3OL4CSUM_F) \
1862 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
1863 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1864 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
1866 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
1867 TSO_F | VLAN_F | L3L4CSUM_F) \
1868 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
1869 TSO_F | VLAN_F | OL3OL4CSUM_F) \
1870 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
1871 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1872 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
1874 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
1875 TSO_F | NOFF_F | L3L4CSUM_F) \
1876 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
1877 TSO_F | NOFF_F | OL3OL4CSUM_F) \
1878 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
1879 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1880 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
1881 TSO_F | NOFF_F | VLAN_F) \
1882 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
1883 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1884 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
1885 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1886 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
1887 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1888 T(ts, 1, 0, 0, 0, 0, 0, 8, \
1890 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
1891 TSP_F | L3L4CSUM_F) \
1892 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
1893 TSP_F | OL3OL4CSUM_F) \
1894 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
1895 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1896 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
1898 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
1899 TSP_F | VLAN_F | L3L4CSUM_F) \
1900 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
1901 TSP_F | VLAN_F | OL3OL4CSUM_F) \
1902 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
1903 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1904 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
1906 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
1907 TSP_F | NOFF_F | L3L4CSUM_F) \
1908 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
1909 TSP_F | NOFF_F | OL3OL4CSUM_F) \
1910 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
1911 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1912 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
1913 TSP_F | NOFF_F | VLAN_F) \
1914 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
1915 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1916 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
1917 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1918 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
1919 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1920 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
1922 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
1923 TSP_F | TSO_F | L3L4CSUM_F) \
1924 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
1925 TSP_F | TSO_F | OL3OL4CSUM_F) \
1926 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
1927 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1928 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
1929 TSP_F | TSO_F | VLAN_F) \
1930 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
1931 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1932 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
1933 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1934 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
1935 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1936 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
1937 TSP_F | TSO_F | NOFF_F) \
1938 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
1939 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1940 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
1941 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1942 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
1943 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1944 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
1945 TSP_F | TSO_F | NOFF_F | VLAN_F) \
1946 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
1947 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1948 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
1949 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1950 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
1951 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1953 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
1954 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name( \
1955 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1957 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name( \
1958 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1960 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
1961 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1963 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \
1964 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1966 NIX_TX_FASTPATH_MODES
1969 #endif /* __CN9K_TX_H__ */