1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
17 /* Flags to control xmit_prepare function.
18 * Defining it from backwards to denote its been
19 * not used as offload flags to pick function
21 #define NIX_TX_MULTI_SEG_F BIT(15)
23 #define NIX_TX_NEED_SEND_HDR_W1 \
24 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
25 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
27 #define NIX_TX_NEED_EXT_HDR \
28 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
31 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
33 /* Cached value is low, Update the fc_cache_pkts */ \
34 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
35 /* Multiply with sqe_per_sqb to express in pkts */ \
36 (txq)->fc_cache_pkts = \
37 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
38 << (txq)->sqes_per_sqb_log2; \
39 /* Check it again for the room */ \
40 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
45 /* Function to determine no of tx subdesc required in case ext
46 * sub desc is enabled.
48 static __rte_always_inline int
49 cn9k_nix_tx_ext_subs(const uint16_t flags)
51 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
54 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
59 static __rte_always_inline void
60 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
62 uint64_t mask, ol_flags = m->ol_flags;
64 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
65 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
66 uint16_t *iplen, *oiplen, *oudplen;
67 uint16_t lso_sb, paylen;
69 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
70 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
71 m->l2_len + m->l3_len + m->l4_len;
73 /* Reduce payload len from base headers */
74 paylen = m->pkt_len - lso_sb;
76 /* Get iplen position assuming no tunnel hdr */
77 iplen = (uint16_t *)(mdata + m->l2_len +
78 (2 << !!(ol_flags & PKT_TX_IPV6)));
79 /* Handle tunnel tso */
80 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
81 (ol_flags & PKT_TX_TUNNEL_MASK)) {
82 const uint8_t is_udp_tun =
83 (CNXK_NIX_UDP_TUN_BITMASK >>
84 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
87 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
90 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
93 /* Update format for UDP tunneled packet */
95 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
97 *oudplen = rte_cpu_to_be_16(
98 rte_be_to_cpu_16(*oudplen) - paylen);
101 /* Update iplen position to inner ip hdr */
102 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
104 (2 << !!(ol_flags & PKT_TX_IPV6)));
107 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
111 static __rte_always_inline void
112 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
113 const uint64_t lso_tun_fmt)
115 struct nix_send_ext_s *send_hdr_ext;
116 struct nix_send_hdr_s *send_hdr;
117 uint64_t ol_flags = 0, mask;
118 union nix_send_hdr_w1_u w1;
119 union nix_send_sg_s *sg;
121 send_hdr = (struct nix_send_hdr_s *)cmd;
122 if (flags & NIX_TX_NEED_EXT_HDR) {
123 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
124 sg = (union nix_send_sg_s *)(cmd + 4);
125 /* Clear previous markings */
126 send_hdr_ext->w0.lso = 0;
127 send_hdr_ext->w1.u = 0;
129 sg = (union nix_send_sg_s *)(cmd + 2);
132 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
133 ol_flags = m->ol_flags;
137 if (!(flags & NIX_TX_MULTI_SEG_F)) {
138 send_hdr->w0.total = m->data_len;
140 roc_npa_aura_handle_to_aura(m->pool->pool_id);
145 * 3 => IPV4 with csum
147 * L3type and L3ptr needs to be set for either
148 * L3 csum or L4 csum or LSO
152 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
153 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
154 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
155 const uint8_t ol3type =
156 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
157 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
158 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
161 w1.ol3type = ol3type;
162 mask = 0xffffull << ((!!ol3type) << 4);
163 w1.ol3ptr = ~mask & m->outer_l2_len;
164 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
167 w1.ol4type = csum + (csum << 1);
170 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
171 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
172 w1.il3ptr = w1.ol4ptr + m->l2_len;
173 w1.il4ptr = w1.il3ptr + m->l3_len;
174 /* Increment it by 1 if it is IPV4 as 3 is with csum */
175 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
178 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
180 /* In case of no tunnel header use only
181 * shift IL3/IL4 fields a bit to use
182 * OL3/OL4 for header checksum
185 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
186 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
188 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
189 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
190 const uint8_t outer_l2_len = m->outer_l2_len;
193 w1.ol3ptr = outer_l2_len;
194 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
195 /* Increment it by 1 if it is IPV4 as 3 is with csum */
196 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
197 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
198 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
201 w1.ol4type = csum + (csum << 1);
203 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
204 const uint8_t l2_len = m->l2_len;
206 /* Always use OLXPTR and OLXTYPE when only
207 * when one header is present
212 w1.ol4ptr = l2_len + m->l3_len;
213 /* Increment it by 1 if it is IPV4 as 3 is with csum */
214 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
215 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
216 !!(ol_flags & PKT_TX_IP_CKSUM);
219 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
222 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
223 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
224 /* HW will update ptr after vlan0 update */
225 send_hdr_ext->w1.vlan1_ins_ptr = 12;
226 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
228 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
229 /* 2B before end of l2 header */
230 send_hdr_ext->w1.vlan0_ins_ptr = 12;
231 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
234 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
238 mask = -(!w1.il3type);
239 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
241 send_hdr_ext->w0.lso_sb = lso_sb;
242 send_hdr_ext->w0.lso = 1;
243 send_hdr_ext->w0.lso_mps = m->tso_segsz;
244 send_hdr_ext->w0.lso_format =
245 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
246 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
248 /* Handle tunnel tso */
249 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
250 (ol_flags & PKT_TX_TUNNEL_MASK)) {
251 const uint8_t is_udp_tun =
252 (CNXK_NIX_UDP_TUN_BITMASK >>
253 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
255 uint8_t shift = is_udp_tun ? 32 : 0;
257 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
258 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
260 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
261 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
262 /* Update format for UDP tunneled packet */
263 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
267 if (flags & NIX_TX_NEED_SEND_HDR_W1)
268 send_hdr->w1.u = w1.u;
270 if (!(flags & NIX_TX_MULTI_SEG_F)) {
271 sg->seg1_size = m->data_len;
272 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
274 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
275 /* DF bit = 1 if refcount of current mbuf or parent mbuf
277 * DF bit = 0 otherwise
279 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
280 /* Ensuring mbuf fields which got updated in
281 * cnxk_nix_prefree_seg are written before LMTST.
285 /* Mark mempool object as "put" since it is freed by NIX */
286 if (!send_hdr->w0.df)
287 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
291 static __rte_always_inline void
292 cn9k_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc,
293 const uint64_t ol_flags, const uint16_t no_segdw,
294 const uint16_t flags)
296 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
297 struct nix_send_mem_s *send_mem;
298 uint16_t off = (no_segdw - 1) << 1;
299 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
301 send_mem = (struct nix_send_mem_s *)(cmd + off);
302 if (flags & NIX_TX_MULTI_SEG_F) {
303 /* Retrieving the default desc values */
304 cmd[off] = send_mem_desc[6];
306 /* Using compiler barier to avoid voilation of C
309 rte_compiler_barrier();
312 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
313 * should not be recorded, hence changing the alg type to
314 * NIX_SENDMEMALG_SET and also changing send mem addr field to
315 * next 8 bytes as it corrpt the actual tx tstamp registered
318 send_mem->w0.cn9k.alg =
319 NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
321 send_mem->addr = (rte_iova_t)((uint64_t *)send_mem_desc[7] +
326 static __rte_always_inline void
327 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
328 const uint32_t flags)
333 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
334 lmt_status = roc_lmt_submit_ldeor(io_addr);
335 } while (lmt_status == 0);
338 static __rte_always_inline void
339 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
341 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
344 static __rte_always_inline uint64_t
345 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
347 return roc_lmt_submit_ldeor(io_addr);
350 static __rte_always_inline uint64_t
351 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
353 return roc_lmt_submit_ldeorl(io_addr);
356 static __rte_always_inline uint16_t
357 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
359 struct nix_send_hdr_s *send_hdr;
360 union nix_send_sg_s *sg;
361 struct rte_mbuf *m_next;
362 uint64_t *slist, sg_u;
367 send_hdr = (struct nix_send_hdr_s *)cmd;
368 send_hdr->w0.total = m->pkt_len;
369 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
371 if (flags & NIX_TX_NEED_EXT_HDR)
376 sg = (union nix_send_sg_s *)&cmd[2 + off];
377 /* Clear sg->u header before use */
378 sg->u &= 0xFC00000000000000;
380 slist = &cmd[3 + off];
383 nb_segs = m->nb_segs;
385 /* Fill mbuf segments */
388 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
389 *slist = rte_mbuf_data_iova(m);
390 /* Set invert df if buffer is not to be freed by H/W */
391 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
392 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
393 /* Commit changes to mbuf */
396 /* Mark mempool object as "put" since it is freed by NIX */
397 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
398 if (!(sg_u & (1ULL << (i + 55))))
399 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
405 if (i > 2 && nb_segs) {
407 /* Next SG subdesc */
408 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
411 sg = (union nix_send_sg_s *)slist;
420 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
421 /* Roundup extra dwords to multiple of 2 */
422 segdw = (segdw >> 1) + (segdw & 0x1);
424 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
425 send_hdr->w0.sizem1 = segdw - 1;
430 static __rte_always_inline void
431 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
433 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
436 static __rte_always_inline void
437 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
443 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
444 lmt_status = roc_lmt_submit_ldeor(io_addr);
445 } while (lmt_status == 0);
448 static __rte_always_inline void
449 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
450 rte_iova_t io_addr, uint16_t segdw)
456 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
457 lmt_status = roc_lmt_submit_ldeor(io_addr);
458 } while (lmt_status == 0);
461 static __rte_always_inline uint16_t
462 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
463 uint64_t *cmd, const uint16_t flags)
465 struct cn9k_eth_txq *txq = tx_queue;
466 const rte_iova_t io_addr = txq->io_addr;
467 void *lmt_addr = txq->lmt_addr;
468 uint64_t lso_tun_fmt;
471 NIX_XMIT_FC_OR_RETURN(txq, pkts);
473 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
475 /* Perform header writes before barrier for TSO */
476 if (flags & NIX_TX_OFFLOAD_TSO_F) {
477 lso_tun_fmt = txq->lso_tun_fmt;
479 for (i = 0; i < pkts; i++)
480 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
483 /* Lets commit any changes in the packet here as no further changes
484 * to the packet will be done unless no fast free is enabled.
486 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
489 for (i = 0; i < pkts; i++) {
490 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
491 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
492 tx_pkts[i]->ol_flags, 4, flags);
493 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
496 /* Reduce the cached count */
497 txq->fc_cache_pkts -= pkts;
502 static __rte_always_inline uint16_t
503 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
504 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
506 struct cn9k_eth_txq *txq = tx_queue;
507 const rte_iova_t io_addr = txq->io_addr;
508 void *lmt_addr = txq->lmt_addr;
509 uint64_t lso_tun_fmt;
513 NIX_XMIT_FC_OR_RETURN(txq, pkts);
515 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
517 /* Perform header writes before barrier for TSO */
518 if (flags & NIX_TX_OFFLOAD_TSO_F) {
519 lso_tun_fmt = txq->lso_tun_fmt;
521 for (i = 0; i < pkts; i++)
522 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
525 /* Lets commit any changes in the packet here as no further changes
526 * to the packet will be done unless no fast free is enabled.
528 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
531 for (i = 0; i < pkts; i++) {
532 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
533 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
534 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
535 tx_pkts[i]->ol_flags, segdw,
537 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
540 /* Reduce the cached count */
541 txq->fc_cache_pkts -= pkts;
546 #if defined(RTE_ARCH_ARM64)
548 #define NIX_DESCS_PER_LOOP 4
549 static __rte_always_inline uint16_t
550 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
551 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
553 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
554 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
555 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
556 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
557 uint64x2_t senddesc01_w0, senddesc23_w0;
558 uint64x2_t senddesc01_w1, senddesc23_w1;
559 uint64x2_t sgdesc01_w0, sgdesc23_w0;
560 uint64x2_t sgdesc01_w1, sgdesc23_w1;
561 struct cn9k_eth_txq *txq = tx_queue;
562 uint64_t *lmt_addr = txq->lmt_addr;
563 rte_iova_t io_addr = txq->io_addr;
564 uint64x2_t ltypes01, ltypes23;
565 uint64x2_t xtmp128, ytmp128;
566 uint64x2_t xmask01, xmask23;
567 uint64_t lmt_status, i;
570 NIX_XMIT_FC_OR_RETURN(txq, pkts);
572 pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
573 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
575 /* Reduce the cached count */
576 txq->fc_cache_pkts -= pkts;
578 /* Lets commit any changes in the packet here as no further changes
579 * to the packet will be done unless no fast free is enabled.
581 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
584 senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
585 senddesc23_w0 = senddesc01_w0;
586 senddesc01_w1 = vdupq_n_u64(0);
587 senddesc23_w1 = senddesc01_w1;
588 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
589 sgdesc23_w0 = sgdesc01_w0;
591 for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
592 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
594 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
595 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
597 senddesc23_w0 = senddesc01_w0;
598 sgdesc23_w0 = sgdesc01_w0;
600 /* Move mbufs to iova */
601 mbuf0 = (uint64_t *)tx_pkts[0];
602 mbuf1 = (uint64_t *)tx_pkts[1];
603 mbuf2 = (uint64_t *)tx_pkts[2];
604 mbuf3 = (uint64_t *)tx_pkts[3];
606 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
607 offsetof(struct rte_mbuf, buf_iova));
608 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
609 offsetof(struct rte_mbuf, buf_iova));
610 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
611 offsetof(struct rte_mbuf, buf_iova));
612 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
613 offsetof(struct rte_mbuf, buf_iova));
615 * Get mbuf's, olflags, iova, pktlen, dataoff
616 * dataoff_iovaX.D[0] = iova,
617 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
618 * len_olflagsX.D[0] = ol_flags,
619 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
621 dataoff_iova0 = vld1q_u64(mbuf0);
622 len_olflags0 = vld1q_u64(mbuf0 + 2);
623 dataoff_iova1 = vld1q_u64(mbuf1);
624 len_olflags1 = vld1q_u64(mbuf1 + 2);
625 dataoff_iova2 = vld1q_u64(mbuf2);
626 len_olflags2 = vld1q_u64(mbuf2 + 2);
627 dataoff_iova3 = vld1q_u64(mbuf3);
628 len_olflags3 = vld1q_u64(mbuf3 + 2);
630 /* Move mbufs to point pool */
631 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
632 offsetof(struct rte_mbuf, pool) -
633 offsetof(struct rte_mbuf, buf_iova));
634 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
635 offsetof(struct rte_mbuf, pool) -
636 offsetof(struct rte_mbuf, buf_iova));
637 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
638 offsetof(struct rte_mbuf, pool) -
639 offsetof(struct rte_mbuf, buf_iova));
640 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
641 offsetof(struct rte_mbuf, pool) -
642 offsetof(struct rte_mbuf, buf_iova));
644 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
645 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
646 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
648 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
649 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
652 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
653 : [a] "+w"(senddesc01_w1)
654 : [in] "r"(mbuf0 + 2)
657 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
658 : [a] "+w"(senddesc01_w1)
659 : [in] "r"(mbuf1 + 2)
662 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
663 : [b] "+w"(senddesc23_w1)
664 : [in] "r"(mbuf2 + 2)
667 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
668 : [b] "+w"(senddesc23_w1)
669 : [in] "r"(mbuf3 + 2)
672 /* Get pool pointer alone */
673 mbuf0 = (uint64_t *)*mbuf0;
674 mbuf1 = (uint64_t *)*mbuf1;
675 mbuf2 = (uint64_t *)*mbuf2;
676 mbuf3 = (uint64_t *)*mbuf3;
678 /* Get pool pointer alone */
679 mbuf0 = (uint64_t *)*mbuf0;
680 mbuf1 = (uint64_t *)*mbuf1;
681 mbuf2 = (uint64_t *)*mbuf2;
682 mbuf3 = (uint64_t *)*mbuf3;
685 const uint8x16_t shuf_mask2 = {
686 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
687 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
689 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
690 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
692 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
693 const uint64x2_t and_mask0 = {
698 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
699 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
700 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
701 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
704 * Pick only 16 bits of pktlen preset at bits 63:32
705 * and place them at bits 15:0.
707 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
708 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
710 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
711 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
712 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
714 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
715 * pktlen at 15:0 position.
717 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
718 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
719 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
720 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
722 /* Move mbuf to point to pool_id. */
723 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
724 offsetof(struct rte_mempool, pool_id));
725 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
726 offsetof(struct rte_mempool, pool_id));
727 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
728 offsetof(struct rte_mempool, pool_id));
729 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
730 offsetof(struct rte_mempool, pool_id));
732 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
733 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
735 * Lookup table to translate ol_flags to
736 * il3/il4 types. But we still use ol3/ol4 types in
737 * senddesc_w1 as only one header processing is enabled.
739 const uint8x16_t tbl = {
740 /* [0-15] = il4type:il3type */
741 0x04, /* none (IPv6 assumed) */
742 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
743 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
744 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
745 0x03, /* PKT_TX_IP_CKSUM */
746 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
747 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
748 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
749 0x02, /* PKT_TX_IPV4 */
750 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
751 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
752 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
753 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
754 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
757 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
760 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
765 /* Extract olflags to translate to iltypes */
766 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
767 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
770 * E(47):L3_LEN(9):L2_LEN(7+z)
771 * E(47):L3_LEN(9):L2_LEN(7+z)
773 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
774 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
776 /* Move OLFLAGS bits 55:52 to 51:48
777 * with zeros preprended on the byte and rest
780 xtmp128 = vshrq_n_u8(xtmp128, 4);
781 ytmp128 = vshrq_n_u8(ytmp128, 4);
783 * E(48):L3_LEN(8):L2_LEN(z+7)
784 * E(48):L3_LEN(8):L2_LEN(z+7)
786 const int8x16_t tshft3 = {
787 -1, 0, 8, 8, 8, 8, 8, 8,
788 -1, 0, 8, 8, 8, 8, 8, 8,
791 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
792 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
795 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
796 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
798 /* Pick only relevant fields i.e Bit 48:55 of iltype
799 * and place it in ol3/ol4type of senddesc_w1
801 const uint8x16_t shuf_mask0 = {
802 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
803 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
806 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
807 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
809 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
810 * a [E(32):E(16):OL3(8):OL2(8)]
812 * a [E(32):E(16):(OL3+OL2):OL2]
813 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
815 senddesc01_w1 = vaddq_u8(senddesc01_w1,
816 vshlq_n_u16(senddesc01_w1, 8));
817 senddesc23_w1 = vaddq_u8(senddesc23_w1,
818 vshlq_n_u16(senddesc23_w1, 8));
820 /* Move ltypes to senddesc*_w1 */
821 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
822 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
823 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
824 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
826 * Lookup table to translate ol_flags to
830 const uint8x16_t tbl = {
831 /* [0-15] = ol4type:ol3type */
833 0x03, /* OUTER_IP_CKSUM */
834 0x02, /* OUTER_IPV4 */
835 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
836 0x04, /* OUTER_IPV6 */
837 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
838 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
839 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
842 0x00, /* OUTER_UDP_CKSUM */
843 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
844 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
845 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
848 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
849 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
852 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
855 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
856 * OUTER_IPV4 | OUTER_IP_CKSUM
860 /* Extract olflags to translate to iltypes */
861 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
862 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
865 * E(47):OL3_LEN(9):OL2_LEN(7+z)
866 * E(47):OL3_LEN(9):OL2_LEN(7+z)
868 const uint8x16_t shuf_mask5 = {
869 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
870 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
872 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
873 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
875 /* Extract outer ol flags only */
876 const uint64x2_t o_cksum_mask = {
881 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
882 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
884 /* Extract OUTER_UDP_CKSUM bit 41 and
888 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
889 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
891 /* Shift oltype by 2 to start nibble from BIT(56)
894 xtmp128 = vshrq_n_u8(xtmp128, 2);
895 ytmp128 = vshrq_n_u8(ytmp128, 2);
897 * E(48):L3_LEN(8):L2_LEN(z+7)
898 * E(48):L3_LEN(8):L2_LEN(z+7)
900 const int8x16_t tshft3 = {
901 -1, 0, 8, 8, 8, 8, 8, 8,
902 -1, 0, 8, 8, 8, 8, 8, 8,
905 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
906 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
909 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
910 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
912 /* Pick only relevant fields i.e Bit 56:63 of oltype
913 * and place it in ol3/ol4type of senddesc_w1
915 const uint8x16_t shuf_mask0 = {
916 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
917 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
920 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
921 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
923 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
924 * a [E(32):E(16):OL3(8):OL2(8)]
926 * a [E(32):E(16):(OL3+OL2):OL2]
927 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
929 senddesc01_w1 = vaddq_u8(senddesc01_w1,
930 vshlq_n_u16(senddesc01_w1, 8));
931 senddesc23_w1 = vaddq_u8(senddesc23_w1,
932 vshlq_n_u16(senddesc23_w1, 8));
934 /* Move ltypes to senddesc*_w1 */
935 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
936 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
937 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
938 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
939 /* Lookup table to translate ol_flags to
940 * ol4type, ol3type, il4type, il3type of senddesc_w1
942 const uint8x16x2_t tbl = {{
944 /* [0-15] = il4type:il3type */
945 0x04, /* none (IPv6) */
946 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
947 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
948 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
949 0x03, /* PKT_TX_IP_CKSUM */
950 0x13, /* PKT_TX_IP_CKSUM |
953 0x23, /* PKT_TX_IP_CKSUM |
956 0x33, /* PKT_TX_IP_CKSUM |
959 0x02, /* PKT_TX_IPV4 */
960 0x12, /* PKT_TX_IPV4 |
963 0x22, /* PKT_TX_IPV4 |
966 0x32, /* PKT_TX_IPV4 |
969 0x03, /* PKT_TX_IPV4 |
972 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
975 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
978 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
984 /* [16-31] = ol4type:ol3type */
986 0x03, /* OUTER_IP_CKSUM */
987 0x02, /* OUTER_IPV4 */
988 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
989 0x04, /* OUTER_IPV6 */
990 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
991 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
992 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
995 0x00, /* OUTER_UDP_CKSUM */
996 0x33, /* OUTER_UDP_CKSUM |
999 0x32, /* OUTER_UDP_CKSUM |
1002 0x33, /* OUTER_UDP_CKSUM |
1003 * OUTER_IPV4 | OUTER_IP_CKSUM
1005 0x34, /* OUTER_UDP_CKSUM |
1008 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1011 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1014 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1015 * OUTER_IPV4 | OUTER_IP_CKSUM
1020 /* Extract olflags to translate to oltype & iltype */
1021 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1022 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1025 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1026 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1028 const uint32x4_t tshft_4 = {
1034 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1035 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1038 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1039 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1041 const uint8x16_t shuf_mask5 = {
1042 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1043 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1045 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1046 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1048 /* Extract outer and inner header ol_flags */
1049 const uint64x2_t oi_cksum_mask = {
1054 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1055 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1057 /* Extract OUTER_UDP_CKSUM bit 41 and
1061 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1062 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1064 /* Shift right oltype by 2 and iltype by 4
1065 * to start oltype nibble from BIT(58)
1066 * instead of BIT(56) and iltype nibble from BIT(48)
1067 * instead of BIT(52).
1069 const int8x16_t tshft5 = {
1070 8, 8, 8, 8, 8, 8, -4, -2,
1071 8, 8, 8, 8, 8, 8, -4, -2,
1074 xtmp128 = vshlq_u8(xtmp128, tshft5);
1075 ytmp128 = vshlq_u8(ytmp128, tshft5);
1077 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1078 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1080 const int8x16_t tshft3 = {
1081 -1, 0, -1, 0, 0, 0, 0, 0,
1082 -1, 0, -1, 0, 0, 0, 0, 0,
1085 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1086 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1088 /* Mark Bit(4) of oltype */
1089 const uint64x2_t oi_cksum_mask2 = {
1094 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1095 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1098 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1099 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1101 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1102 * Bit 56:63 of oltype and place it in corresponding
1103 * place in senddesc_w1.
1105 const uint8x16_t shuf_mask0 = {
1106 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1107 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1110 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1111 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1113 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1114 * l3len, l2len, ol3len, ol2len.
1115 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1117 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1119 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1120 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1122 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1123 vshlq_n_u32(senddesc01_w1, 8));
1124 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1125 vshlq_n_u32(senddesc23_w1, 8));
1127 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1128 senddesc01_w1 = vaddq_u8(
1129 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1130 senddesc23_w1 = vaddq_u8(
1131 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1133 /* Move ltypes to senddesc*_w1 */
1134 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1135 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1138 xmask01 = vdupq_n_u64(0);
1140 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1145 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1150 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1155 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1159 xmask01 = vshlq_n_u64(xmask01, 20);
1160 xmask23 = vshlq_n_u64(xmask23, 20);
1162 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1163 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1165 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1166 /* Set don't free bit if reference count > 1 */
1167 xmask01 = vdupq_n_u64(0);
1170 /* Move mbufs to iova */
1171 mbuf0 = (uint64_t *)tx_pkts[0];
1172 mbuf1 = (uint64_t *)tx_pkts[1];
1173 mbuf2 = (uint64_t *)tx_pkts[2];
1174 mbuf3 = (uint64_t *)tx_pkts[3];
1176 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1177 vsetq_lane_u64(0x80000, xmask01, 0);
1179 __mempool_check_cookies(
1180 ((struct rte_mbuf *)mbuf0)->pool,
1181 (void **)&mbuf0, 1, 0);
1183 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1184 vsetq_lane_u64(0x80000, xmask01, 1);
1186 __mempool_check_cookies(
1187 ((struct rte_mbuf *)mbuf1)->pool,
1188 (void **)&mbuf1, 1, 0);
1190 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1191 vsetq_lane_u64(0x80000, xmask23, 0);
1193 __mempool_check_cookies(
1194 ((struct rte_mbuf *)mbuf2)->pool,
1195 (void **)&mbuf2, 1, 0);
1197 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1198 vsetq_lane_u64(0x80000, xmask23, 1);
1200 __mempool_check_cookies(
1201 ((struct rte_mbuf *)mbuf3)->pool,
1202 (void **)&mbuf3, 1, 0);
1203 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1204 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1205 /* Ensuring mbuf fields which got updated in
1206 * cnxk_nix_prefree_seg are written before LMTST.
1210 /* Move mbufs to iova */
1211 mbuf0 = (uint64_t *)tx_pkts[0];
1212 mbuf1 = (uint64_t *)tx_pkts[1];
1213 mbuf2 = (uint64_t *)tx_pkts[2];
1214 mbuf3 = (uint64_t *)tx_pkts[3];
1216 /* Mark mempool object as "put" since
1217 * it is freed by NIX
1219 __mempool_check_cookies(
1220 ((struct rte_mbuf *)mbuf0)->pool,
1221 (void **)&mbuf0, 1, 0);
1223 __mempool_check_cookies(
1224 ((struct rte_mbuf *)mbuf1)->pool,
1225 (void **)&mbuf1, 1, 0);
1227 __mempool_check_cookies(
1228 ((struct rte_mbuf *)mbuf2)->pool,
1229 (void **)&mbuf2, 1, 0);
1231 __mempool_check_cookies(
1232 ((struct rte_mbuf *)mbuf3)->pool,
1233 (void **)&mbuf3, 1, 0);
1234 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1239 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1240 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1241 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1242 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1243 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1245 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1246 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1247 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1248 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1251 vst1q_u64(lmt_addr, cmd0[0]);
1252 vst1q_u64(lmt_addr + 2, cmd1[0]);
1253 vst1q_u64(lmt_addr + 4, cmd0[1]);
1254 vst1q_u64(lmt_addr + 6, cmd1[1]);
1255 vst1q_u64(lmt_addr + 8, cmd0[2]);
1256 vst1q_u64(lmt_addr + 10, cmd1[2]);
1257 vst1q_u64(lmt_addr + 12, cmd0[3]);
1258 vst1q_u64(lmt_addr + 14, cmd1[3]);
1259 lmt_status = roc_lmt_submit_ldeor(io_addr);
1260 } while (lmt_status == 0);
1261 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1264 if (unlikely(pkts_left))
1265 pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,
1272 static __rte_always_inline uint16_t
1273 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1274 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1276 RTE_SET_USED(tx_queue);
1277 RTE_SET_USED(tx_pkts);
1280 RTE_SET_USED(flags);
1285 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1286 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1287 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1288 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1289 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1290 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1292 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1293 #define NIX_TX_FASTPATH_MODES \
1294 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
1295 NIX_TX_OFFLOAD_NONE) \
1296 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
1298 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
1300 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
1301 OL3OL4CSUM_F | L3L4CSUM_F) \
1302 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
1304 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
1305 VLAN_F | L3L4CSUM_F) \
1306 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
1307 VLAN_F | OL3OL4CSUM_F) \
1308 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
1309 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1310 T(noff, 0, 0, 1, 0, 0, 0, 4, \
1312 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
1313 NOFF_F | L3L4CSUM_F) \
1314 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
1315 NOFF_F | OL3OL4CSUM_F) \
1316 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
1317 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1318 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
1320 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
1321 NOFF_F | VLAN_F | L3L4CSUM_F) \
1322 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
1323 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1324 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
1325 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1326 T(tso, 0, 1, 0, 0, 0, 0, 6, \
1328 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
1329 TSO_F | L3L4CSUM_F) \
1330 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
1331 TSO_F | OL3OL4CSUM_F) \
1332 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
1333 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1334 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
1336 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
1337 TSO_F | VLAN_F | L3L4CSUM_F) \
1338 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
1339 TSO_F | VLAN_F | OL3OL4CSUM_F) \
1340 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
1341 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1342 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
1344 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
1345 TSO_F | NOFF_F | L3L4CSUM_F) \
1346 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
1347 TSO_F | NOFF_F | OL3OL4CSUM_F) \
1348 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
1349 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1350 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
1351 TSO_F | NOFF_F | VLAN_F) \
1352 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
1353 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1354 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
1355 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1356 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
1357 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1358 T(ts, 1, 0, 0, 0, 0, 0, 8, \
1360 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
1361 TSP_F | L3L4CSUM_F) \
1362 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
1363 TSP_F | OL3OL4CSUM_F) \
1364 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
1365 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1366 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
1368 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
1369 TSP_F | VLAN_F | L3L4CSUM_F) \
1370 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
1371 TSP_F | VLAN_F | OL3OL4CSUM_F) \
1372 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
1373 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1374 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
1376 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
1377 TSP_F | NOFF_F | L3L4CSUM_F) \
1378 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
1379 TSP_F | NOFF_F | OL3OL4CSUM_F) \
1380 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
1381 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1382 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
1383 TSP_F | NOFF_F | VLAN_F) \
1384 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
1385 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1386 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
1387 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1388 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
1389 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1390 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
1392 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
1393 TSP_F | TSO_F | L3L4CSUM_F) \
1394 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
1395 TSP_F | TSO_F | OL3OL4CSUM_F) \
1396 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
1397 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1398 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
1399 TSP_F | TSO_F | VLAN_F) \
1400 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
1401 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1402 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
1403 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1404 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
1405 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1406 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
1407 TSP_F | TSO_F | NOFF_F) \
1408 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
1409 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1410 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
1411 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1412 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
1413 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1414 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
1415 TSP_F | TSO_F | NOFF_F | VLAN_F) \
1416 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
1417 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1418 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
1419 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1420 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
1421 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1423 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
1424 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name( \
1425 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1427 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name( \
1428 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1430 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
1431 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1433 NIX_TX_FASTPATH_MODES
1436 #endif /* __CN9K_TX_H__ */