1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
9 #define NIX_TX_OFFLOAD_NONE (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
17 /* Flags to control xmit_prepare function.
18 * Defining it from backwards to denote its been
19 * not used as offload flags to pick function
21 #define NIX_TX_MULTI_SEG_F BIT(15)
23 #define NIX_TX_NEED_SEND_HDR_W1 \
24 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
25 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
27 #define NIX_TX_NEED_EXT_HDR \
28 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
31 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
33 /* Cached value is low, Update the fc_cache_pkts */ \
34 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
35 /* Multiply with sqe_per_sqb to express in pkts */ \
36 (txq)->fc_cache_pkts = \
37 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
38 << (txq)->sqes_per_sqb_log2; \
39 /* Check it again for the room */ \
40 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
45 /* Function to determine no of tx subdesc required in case ext
46 * sub desc is enabled.
48 static __rte_always_inline int
49 cn9k_nix_tx_ext_subs(const uint16_t flags)
51 return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
54 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
59 static __rte_always_inline void
60 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
62 uint64_t mask, ol_flags = m->ol_flags;
64 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
65 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
66 uint16_t *iplen, *oiplen, *oudplen;
67 uint16_t lso_sb, paylen;
69 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
70 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
71 m->l2_len + m->l3_len + m->l4_len;
73 /* Reduce payload len from base headers */
74 paylen = m->pkt_len - lso_sb;
76 /* Get iplen position assuming no tunnel hdr */
77 iplen = (uint16_t *)(mdata + m->l2_len +
78 (2 << !!(ol_flags & PKT_TX_IPV6)));
79 /* Handle tunnel tso */
80 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
81 (ol_flags & PKT_TX_TUNNEL_MASK)) {
82 const uint8_t is_udp_tun =
83 (CNXK_NIX_UDP_TUN_BITMASK >>
84 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
87 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
90 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
93 /* Update format for UDP tunneled packet */
95 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
97 *oudplen = rte_cpu_to_be_16(
98 rte_be_to_cpu_16(*oudplen) - paylen);
101 /* Update iplen position to inner ip hdr */
102 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
104 (2 << !!(ol_flags & PKT_TX_IPV6)));
107 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
111 static __rte_always_inline void
112 cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
113 const uint64_t lso_tun_fmt)
115 struct nix_send_ext_s *send_hdr_ext;
116 struct nix_send_hdr_s *send_hdr;
117 uint64_t ol_flags = 0, mask;
118 union nix_send_hdr_w1_u w1;
119 union nix_send_sg_s *sg;
121 send_hdr = (struct nix_send_hdr_s *)cmd;
122 if (flags & NIX_TX_NEED_EXT_HDR) {
123 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
124 sg = (union nix_send_sg_s *)(cmd + 4);
125 /* Clear previous markings */
126 send_hdr_ext->w0.lso = 0;
127 send_hdr_ext->w1.u = 0;
129 sg = (union nix_send_sg_s *)(cmd + 2);
132 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
133 ol_flags = m->ol_flags;
137 if (!(flags & NIX_TX_MULTI_SEG_F)) {
138 send_hdr->w0.total = m->data_len;
140 roc_npa_aura_handle_to_aura(m->pool->pool_id);
145 * 3 => IPV4 with csum
147 * L3type and L3ptr needs to be set for either
148 * L3 csum or L4 csum or LSO
152 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
153 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
154 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
155 const uint8_t ol3type =
156 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
157 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
158 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
161 w1.ol3type = ol3type;
162 mask = 0xffffull << ((!!ol3type) << 4);
163 w1.ol3ptr = ~mask & m->outer_l2_len;
164 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
167 w1.ol4type = csum + (csum << 1);
170 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
171 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
172 w1.il3ptr = w1.ol4ptr + m->l2_len;
173 w1.il4ptr = w1.il3ptr + m->l3_len;
174 /* Increment it by 1 if it is IPV4 as 3 is with csum */
175 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
178 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
180 /* In case of no tunnel header use only
181 * shift IL3/IL4 fields a bit to use
182 * OL3/OL4 for header checksum
185 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
186 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
188 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
189 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
190 const uint8_t outer_l2_len = m->outer_l2_len;
193 w1.ol3ptr = outer_l2_len;
194 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
195 /* Increment it by 1 if it is IPV4 as 3 is with csum */
196 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
197 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
198 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
201 w1.ol4type = csum + (csum << 1);
203 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
204 const uint8_t l2_len = m->l2_len;
206 /* Always use OLXPTR and OLXTYPE when only
207 * when one header is present
212 w1.ol4ptr = l2_len + m->l3_len;
213 /* Increment it by 1 if it is IPV4 as 3 is with csum */
214 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
215 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
216 !!(ol_flags & PKT_TX_IP_CKSUM);
219 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
222 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
223 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
224 /* HW will update ptr after vlan0 update */
225 send_hdr_ext->w1.vlan1_ins_ptr = 12;
226 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
228 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
229 /* 2B before end of l2 header */
230 send_hdr_ext->w1.vlan0_ins_ptr = 12;
231 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
234 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
238 mask = -(!w1.il3type);
239 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
241 send_hdr_ext->w0.lso_sb = lso_sb;
242 send_hdr_ext->w0.lso = 1;
243 send_hdr_ext->w0.lso_mps = m->tso_segsz;
244 send_hdr_ext->w0.lso_format =
245 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
246 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
248 /* Handle tunnel tso */
249 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
250 (ol_flags & PKT_TX_TUNNEL_MASK)) {
251 const uint8_t is_udp_tun =
252 (CNXK_NIX_UDP_TUN_BITMASK >>
253 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
255 uint8_t shift = is_udp_tun ? 32 : 0;
257 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
258 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
260 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
261 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
262 /* Update format for UDP tunneled packet */
263 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
267 if (flags & NIX_TX_NEED_SEND_HDR_W1)
268 send_hdr->w1.u = w1.u;
270 if (!(flags & NIX_TX_MULTI_SEG_F)) {
271 sg->seg1_size = m->data_len;
272 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
274 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
275 /* DF bit = 1 if refcount of current mbuf or parent mbuf
277 * DF bit = 0 otherwise
279 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
280 /* Ensuring mbuf fields which got updated in
281 * cnxk_nix_prefree_seg are written before LMTST.
285 /* Mark mempool object as "put" since it is freed by NIX */
286 if (!send_hdr->w0.df)
287 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
291 static __rte_always_inline void
292 cn9k_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc,
293 const uint64_t ol_flags, const uint16_t no_segdw,
294 const uint16_t flags)
296 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
297 struct nix_send_mem_s *send_mem;
298 uint16_t off = (no_segdw - 1) << 1;
299 const uint8_t is_ol_tstamp = !(ol_flags & PKT_TX_IEEE1588_TMST);
301 send_mem = (struct nix_send_mem_s *)(cmd + off);
302 if (flags & NIX_TX_MULTI_SEG_F) {
303 /* Retrieving the default desc values */
304 cmd[off] = send_mem_desc[6];
306 /* Using compiler barier to avoid voilation of C
309 rte_compiler_barrier();
312 /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
313 * should not be recorded, hence changing the alg type to
314 * NIX_SENDMEMALG_SET and also changing send mem addr field to
315 * next 8 bytes as it corrpt the actual tx tstamp registered
318 send_mem->w0.cn9k.alg =
319 NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
321 send_mem->addr = (rte_iova_t)((uint64_t *)send_mem_desc[7] +
326 static __rte_always_inline void
327 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
328 const uint32_t flags)
333 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
334 lmt_status = roc_lmt_submit_ldeor(io_addr);
335 } while (lmt_status == 0);
338 static __rte_always_inline void
339 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
341 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
344 static __rte_always_inline uint64_t
345 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
347 return roc_lmt_submit_ldeor(io_addr);
350 static __rte_always_inline uint64_t
351 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
353 return roc_lmt_submit_ldeorl(io_addr);
356 static __rte_always_inline uint16_t
357 cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
359 struct nix_send_hdr_s *send_hdr;
360 union nix_send_sg_s *sg;
361 struct rte_mbuf *m_next;
362 uint64_t *slist, sg_u;
367 send_hdr = (struct nix_send_hdr_s *)cmd;
368 send_hdr->w0.total = m->pkt_len;
369 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
371 if (flags & NIX_TX_NEED_EXT_HDR)
376 sg = (union nix_send_sg_s *)&cmd[2 + off];
377 /* Clear sg->u header before use */
378 sg->u &= 0xFC00000000000000;
380 slist = &cmd[3 + off];
383 nb_segs = m->nb_segs;
385 /* Fill mbuf segments */
388 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
389 *slist = rte_mbuf_data_iova(m);
390 /* Set invert df if buffer is not to be freed by H/W */
391 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
392 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
393 /* Commit changes to mbuf */
396 /* Mark mempool object as "put" since it is freed by NIX */
397 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
398 if (!(sg_u & (1ULL << (i + 55))))
399 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
405 if (i > 2 && nb_segs) {
407 /* Next SG subdesc */
408 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
411 sg = (union nix_send_sg_s *)slist;
420 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
421 /* Roundup extra dwords to multiple of 2 */
422 segdw = (segdw >> 1) + (segdw & 0x1);
424 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
425 send_hdr->w0.sizem1 = segdw - 1;
430 static __rte_always_inline void
431 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
433 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
436 static __rte_always_inline void
437 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
443 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
444 lmt_status = roc_lmt_submit_ldeor(io_addr);
445 } while (lmt_status == 0);
448 static __rte_always_inline void
449 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
450 rte_iova_t io_addr, uint16_t segdw)
456 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
457 lmt_status = roc_lmt_submit_ldeor(io_addr);
458 } while (lmt_status == 0);
461 static __rte_always_inline uint16_t
462 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
463 uint64_t *cmd, const uint16_t flags)
465 struct cn9k_eth_txq *txq = tx_queue;
466 const rte_iova_t io_addr = txq->io_addr;
467 void *lmt_addr = txq->lmt_addr;
468 uint64_t lso_tun_fmt;
471 NIX_XMIT_FC_OR_RETURN(txq, pkts);
473 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
475 /* Perform header writes before barrier for TSO */
476 if (flags & NIX_TX_OFFLOAD_TSO_F) {
477 lso_tun_fmt = txq->lso_tun_fmt;
479 for (i = 0; i < pkts; i++)
480 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
483 /* Lets commit any changes in the packet here as no further changes
484 * to the packet will be done unless no fast free is enabled.
486 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
489 for (i = 0; i < pkts; i++) {
490 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
491 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
492 tx_pkts[i]->ol_flags, 4, flags);
493 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
496 /* Reduce the cached count */
497 txq->fc_cache_pkts -= pkts;
502 static __rte_always_inline uint16_t
503 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
504 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
506 struct cn9k_eth_txq *txq = tx_queue;
507 const rte_iova_t io_addr = txq->io_addr;
508 void *lmt_addr = txq->lmt_addr;
509 uint64_t lso_tun_fmt;
513 NIX_XMIT_FC_OR_RETURN(txq, pkts);
515 roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
517 /* Perform header writes before barrier for TSO */
518 if (flags & NIX_TX_OFFLOAD_TSO_F) {
519 lso_tun_fmt = txq->lso_tun_fmt;
521 for (i = 0; i < pkts; i++)
522 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
525 /* Lets commit any changes in the packet here as no further changes
526 * to the packet will be done unless no fast free is enabled.
528 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
531 for (i = 0; i < pkts; i++) {
532 cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
533 segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
534 cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
535 tx_pkts[i]->ol_flags, segdw,
537 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
540 /* Reduce the cached count */
541 txq->fc_cache_pkts -= pkts;
546 #if defined(RTE_ARCH_ARM64)
548 #define NIX_DESCS_PER_LOOP 4
549 static __rte_always_inline uint16_t
550 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
551 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
553 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
554 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
555 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
556 cmd2[NIX_DESCS_PER_LOOP];
557 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
558 uint64x2_t senddesc01_w0, senddesc23_w0;
559 uint64x2_t senddesc01_w1, senddesc23_w1;
560 uint64x2_t sendext01_w0, sendext23_w0;
561 uint64x2_t sendext01_w1, sendext23_w1;
562 uint64x2_t sgdesc01_w0, sgdesc23_w0;
563 uint64x2_t sgdesc01_w1, sgdesc23_w1;
564 struct cn9k_eth_txq *txq = tx_queue;
565 uint64_t *lmt_addr = txq->lmt_addr;
566 rte_iova_t io_addr = txq->io_addr;
567 uint64x2_t ltypes01, ltypes23;
568 uint64x2_t xtmp128, ytmp128;
569 uint64x2_t xmask01, xmask23;
570 uint64_t lmt_status, i;
573 NIX_XMIT_FC_OR_RETURN(txq, pkts);
575 pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
576 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
578 /* Reduce the cached count */
579 txq->fc_cache_pkts -= pkts;
581 /* Lets commit any changes in the packet here as no further changes
582 * to the packet will be done unless no fast free is enabled.
584 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
587 senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
588 senddesc23_w0 = senddesc01_w0;
589 senddesc01_w1 = vdupq_n_u64(0);
590 senddesc23_w1 = senddesc01_w1;
592 /* Load command defaults into vector variables. */
593 if (flags & NIX_TX_NEED_EXT_HDR) {
594 sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
595 sendext23_w0 = sendext01_w0;
596 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
597 sendext23_w1 = sendext01_w1;
598 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
599 sgdesc23_w0 = sgdesc01_w0;
601 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
602 sgdesc23_w0 = sgdesc01_w0;
605 for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
606 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
608 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
609 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
611 senddesc23_w0 = senddesc01_w0;
612 sgdesc23_w0 = sgdesc01_w0;
614 /* Clear vlan enables. */
615 if (flags & NIX_TX_NEED_EXT_HDR) {
616 sendext01_w1 = vbicq_u64(sendext01_w1,
617 vdupq_n_u64(0x3FFFF00FFFF00));
618 sendext23_w1 = sendext01_w1;
621 /* Move mbufs to iova */
622 mbuf0 = (uint64_t *)tx_pkts[0];
623 mbuf1 = (uint64_t *)tx_pkts[1];
624 mbuf2 = (uint64_t *)tx_pkts[2];
625 mbuf3 = (uint64_t *)tx_pkts[3];
627 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
628 offsetof(struct rte_mbuf, buf_iova));
629 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
630 offsetof(struct rte_mbuf, buf_iova));
631 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
632 offsetof(struct rte_mbuf, buf_iova));
633 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
634 offsetof(struct rte_mbuf, buf_iova));
636 * Get mbuf's, olflags, iova, pktlen, dataoff
637 * dataoff_iovaX.D[0] = iova,
638 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
639 * len_olflagsX.D[0] = ol_flags,
640 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
642 dataoff_iova0 = vld1q_u64(mbuf0);
643 len_olflags0 = vld1q_u64(mbuf0 + 2);
644 dataoff_iova1 = vld1q_u64(mbuf1);
645 len_olflags1 = vld1q_u64(mbuf1 + 2);
646 dataoff_iova2 = vld1q_u64(mbuf2);
647 len_olflags2 = vld1q_u64(mbuf2 + 2);
648 dataoff_iova3 = vld1q_u64(mbuf3);
649 len_olflags3 = vld1q_u64(mbuf3 + 2);
651 /* Move mbufs to point pool */
652 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
653 offsetof(struct rte_mbuf, pool) -
654 offsetof(struct rte_mbuf, buf_iova));
655 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
656 offsetof(struct rte_mbuf, pool) -
657 offsetof(struct rte_mbuf, buf_iova));
658 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
659 offsetof(struct rte_mbuf, pool) -
660 offsetof(struct rte_mbuf, buf_iova));
661 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
662 offsetof(struct rte_mbuf, pool) -
663 offsetof(struct rte_mbuf, buf_iova));
665 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
666 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
667 /* Get tx_offload for ol2, ol3, l2, l3 lengths */
669 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
670 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
673 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
674 : [a] "+w"(senddesc01_w1)
675 : [in] "r"(mbuf0 + 2)
678 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
679 : [a] "+w"(senddesc01_w1)
680 : [in] "r"(mbuf1 + 2)
683 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
684 : [b] "+w"(senddesc23_w1)
685 : [in] "r"(mbuf2 + 2)
688 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
689 : [b] "+w"(senddesc23_w1)
690 : [in] "r"(mbuf3 + 2)
693 /* Get pool pointer alone */
694 mbuf0 = (uint64_t *)*mbuf0;
695 mbuf1 = (uint64_t *)*mbuf1;
696 mbuf2 = (uint64_t *)*mbuf2;
697 mbuf3 = (uint64_t *)*mbuf3;
699 /* Get pool pointer alone */
700 mbuf0 = (uint64_t *)*mbuf0;
701 mbuf1 = (uint64_t *)*mbuf1;
702 mbuf2 = (uint64_t *)*mbuf2;
703 mbuf3 = (uint64_t *)*mbuf3;
706 const uint8x16_t shuf_mask2 = {
707 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
708 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
710 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
711 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
713 /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
714 const uint64x2_t and_mask0 = {
719 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
720 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
721 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
722 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
725 * Pick only 16 bits of pktlen preset at bits 63:32
726 * and place them at bits 15:0.
728 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
729 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
731 /* Add pairwise to get dataoff + iova in sgdesc_w1 */
732 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
733 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
735 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
736 * pktlen at 15:0 position.
738 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
739 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
740 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
741 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
743 /* Move mbuf to point to pool_id. */
744 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
745 offsetof(struct rte_mempool, pool_id));
746 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
747 offsetof(struct rte_mempool, pool_id));
748 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
749 offsetof(struct rte_mempool, pool_id));
750 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
751 offsetof(struct rte_mempool, pool_id));
753 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
754 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
756 * Lookup table to translate ol_flags to
757 * il3/il4 types. But we still use ol3/ol4 types in
758 * senddesc_w1 as only one header processing is enabled.
760 const uint8x16_t tbl = {
761 /* [0-15] = il4type:il3type */
762 0x04, /* none (IPv6 assumed) */
763 0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */
764 0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */
765 0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */
766 0x03, /* PKT_TX_IP_CKSUM */
767 0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */
768 0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */
769 0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */
770 0x02, /* PKT_TX_IPV4 */
771 0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */
772 0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */
773 0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */
774 0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */
775 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
778 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
781 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
786 /* Extract olflags to translate to iltypes */
787 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
788 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
791 * E(47):L3_LEN(9):L2_LEN(7+z)
792 * E(47):L3_LEN(9):L2_LEN(7+z)
794 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
795 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
797 /* Move OLFLAGS bits 55:52 to 51:48
798 * with zeros preprended on the byte and rest
801 xtmp128 = vshrq_n_u8(xtmp128, 4);
802 ytmp128 = vshrq_n_u8(ytmp128, 4);
804 * E(48):L3_LEN(8):L2_LEN(z+7)
805 * E(48):L3_LEN(8):L2_LEN(z+7)
807 const int8x16_t tshft3 = {
808 -1, 0, 8, 8, 8, 8, 8, 8,
809 -1, 0, 8, 8, 8, 8, 8, 8,
812 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
813 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
816 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
817 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
819 /* Pick only relevant fields i.e Bit 48:55 of iltype
820 * and place it in ol3/ol4type of senddesc_w1
822 const uint8x16_t shuf_mask0 = {
823 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
824 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
827 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
828 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
830 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
831 * a [E(32):E(16):OL3(8):OL2(8)]
833 * a [E(32):E(16):(OL3+OL2):OL2]
834 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
836 senddesc01_w1 = vaddq_u8(senddesc01_w1,
837 vshlq_n_u16(senddesc01_w1, 8));
838 senddesc23_w1 = vaddq_u8(senddesc23_w1,
839 vshlq_n_u16(senddesc23_w1, 8));
841 /* Move ltypes to senddesc*_w1 */
842 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
843 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
844 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
845 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
847 * Lookup table to translate ol_flags to
851 const uint8x16_t tbl = {
852 /* [0-15] = ol4type:ol3type */
854 0x03, /* OUTER_IP_CKSUM */
855 0x02, /* OUTER_IPV4 */
856 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
857 0x04, /* OUTER_IPV6 */
858 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
859 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
860 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
863 0x00, /* OUTER_UDP_CKSUM */
864 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
865 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
866 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
869 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
870 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
873 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
876 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
877 * OUTER_IPV4 | OUTER_IP_CKSUM
881 /* Extract olflags to translate to iltypes */
882 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
883 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
886 * E(47):OL3_LEN(9):OL2_LEN(7+z)
887 * E(47):OL3_LEN(9):OL2_LEN(7+z)
889 const uint8x16_t shuf_mask5 = {
890 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
891 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
893 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
894 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
896 /* Extract outer ol flags only */
897 const uint64x2_t o_cksum_mask = {
902 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
903 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
905 /* Extract OUTER_UDP_CKSUM bit 41 and
909 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
910 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
912 /* Shift oltype by 2 to start nibble from BIT(56)
915 xtmp128 = vshrq_n_u8(xtmp128, 2);
916 ytmp128 = vshrq_n_u8(ytmp128, 2);
918 * E(48):L3_LEN(8):L2_LEN(z+7)
919 * E(48):L3_LEN(8):L2_LEN(z+7)
921 const int8x16_t tshft3 = {
922 -1, 0, 8, 8, 8, 8, 8, 8,
923 -1, 0, 8, 8, 8, 8, 8, 8,
926 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
927 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
930 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
931 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
933 /* Pick only relevant fields i.e Bit 56:63 of oltype
934 * and place it in ol3/ol4type of senddesc_w1
936 const uint8x16_t shuf_mask0 = {
937 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
938 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
941 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
942 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
944 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
945 * a [E(32):E(16):OL3(8):OL2(8)]
947 * a [E(32):E(16):(OL3+OL2):OL2]
948 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
950 senddesc01_w1 = vaddq_u8(senddesc01_w1,
951 vshlq_n_u16(senddesc01_w1, 8));
952 senddesc23_w1 = vaddq_u8(senddesc23_w1,
953 vshlq_n_u16(senddesc23_w1, 8));
955 /* Move ltypes to senddesc*_w1 */
956 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
957 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
958 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
959 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
960 /* Lookup table to translate ol_flags to
961 * ol4type, ol3type, il4type, il3type of senddesc_w1
963 const uint8x16x2_t tbl = {{
965 /* [0-15] = il4type:il3type */
966 0x04, /* none (IPv6) */
967 0x14, /* PKT_TX_TCP_CKSUM (IPv6) */
968 0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */
969 0x34, /* PKT_TX_UDP_CKSUM (IPv6) */
970 0x03, /* PKT_TX_IP_CKSUM */
971 0x13, /* PKT_TX_IP_CKSUM |
974 0x23, /* PKT_TX_IP_CKSUM |
977 0x33, /* PKT_TX_IP_CKSUM |
980 0x02, /* PKT_TX_IPV4 */
981 0x12, /* PKT_TX_IPV4 |
984 0x22, /* PKT_TX_IPV4 |
987 0x32, /* PKT_TX_IPV4 |
990 0x03, /* PKT_TX_IPV4 |
993 0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
996 0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
999 0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |
1005 /* [16-31] = ol4type:ol3type */
1007 0x03, /* OUTER_IP_CKSUM */
1008 0x02, /* OUTER_IPV4 */
1009 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1010 0x04, /* OUTER_IPV6 */
1011 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1012 0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1013 0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1016 0x00, /* OUTER_UDP_CKSUM */
1017 0x33, /* OUTER_UDP_CKSUM |
1020 0x32, /* OUTER_UDP_CKSUM |
1023 0x33, /* OUTER_UDP_CKSUM |
1024 * OUTER_IPV4 | OUTER_IP_CKSUM
1026 0x34, /* OUTER_UDP_CKSUM |
1029 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1032 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1035 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1036 * OUTER_IPV4 | OUTER_IP_CKSUM
1041 /* Extract olflags to translate to oltype & iltype */
1042 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1043 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1046 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1047 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1049 const uint32x4_t tshft_4 = {
1055 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1056 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1059 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1060 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1062 const uint8x16_t shuf_mask5 = {
1063 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1064 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1066 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1067 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1069 /* Extract outer and inner header ol_flags */
1070 const uint64x2_t oi_cksum_mask = {
1075 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1076 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1078 /* Extract OUTER_UDP_CKSUM bit 41 and
1082 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1083 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1085 /* Shift right oltype by 2 and iltype by 4
1086 * to start oltype nibble from BIT(58)
1087 * instead of BIT(56) and iltype nibble from BIT(48)
1088 * instead of BIT(52).
1090 const int8x16_t tshft5 = {
1091 8, 8, 8, 8, 8, 8, -4, -2,
1092 8, 8, 8, 8, 8, 8, -4, -2,
1095 xtmp128 = vshlq_u8(xtmp128, tshft5);
1096 ytmp128 = vshlq_u8(ytmp128, tshft5);
1098 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1099 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1101 const int8x16_t tshft3 = {
1102 -1, 0, -1, 0, 0, 0, 0, 0,
1103 -1, 0, -1, 0, 0, 0, 0, 0,
1106 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1107 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1109 /* Mark Bit(4) of oltype */
1110 const uint64x2_t oi_cksum_mask2 = {
1115 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1116 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1119 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1120 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1122 /* Pick only relevant fields i.e Bit 48:55 of iltype and
1123 * Bit 56:63 of oltype and place it in corresponding
1124 * place in senddesc_w1.
1126 const uint8x16_t shuf_mask0 = {
1127 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1128 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1131 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1132 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1134 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1135 * l3len, l2len, ol3len, ol2len.
1136 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1138 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1140 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1141 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1143 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1144 vshlq_n_u32(senddesc01_w1, 8));
1145 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1146 vshlq_n_u32(senddesc23_w1, 8));
1148 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1149 senddesc01_w1 = vaddq_u8(
1150 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1151 senddesc23_w1 = vaddq_u8(
1152 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1154 /* Move ltypes to senddesc*_w1 */
1155 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1156 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1159 xmask01 = vdupq_n_u64(0);
1161 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1166 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1171 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1176 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1180 xmask01 = vshlq_n_u64(xmask01, 20);
1181 xmask23 = vshlq_n_u64(xmask23, 20);
1183 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1184 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1186 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1187 /* Tx ol_flag for vlan. */
1188 const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
1189 /* Bit enable for VLAN1 */
1190 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1191 /* Tx ol_flag for QnQ. */
1192 const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
1193 /* Bit enable for VLAN0 */
1194 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1195 /* Load vlan values from packet. outer is VLAN 0 */
1196 uint64x2_t ext01 = {
1197 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1198 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1199 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1200 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1202 uint64x2_t ext23 = {
1203 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1204 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1205 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1206 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1209 /* Get ol_flags of the packets. */
1210 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1211 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1213 /* ORR vlan outer/inner values into cmd. */
1214 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1215 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1217 /* Test for offload enable bits and generate masks. */
1218 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1220 vandq_u64(vtstq_u64(xtmp128, olq),
1222 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1224 vandq_u64(vtstq_u64(ytmp128, olq),
1227 /* Set vlan enable bits into cmd based on mask. */
1228 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1229 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1232 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1233 /* Set don't free bit if reference count > 1 */
1234 xmask01 = vdupq_n_u64(0);
1237 /* Move mbufs to iova */
1238 mbuf0 = (uint64_t *)tx_pkts[0];
1239 mbuf1 = (uint64_t *)tx_pkts[1];
1240 mbuf2 = (uint64_t *)tx_pkts[2];
1241 mbuf3 = (uint64_t *)tx_pkts[3];
1243 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
1244 vsetq_lane_u64(0x80000, xmask01, 0);
1246 __mempool_check_cookies(
1247 ((struct rte_mbuf *)mbuf0)->pool,
1248 (void **)&mbuf0, 1, 0);
1250 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
1251 vsetq_lane_u64(0x80000, xmask01, 1);
1253 __mempool_check_cookies(
1254 ((struct rte_mbuf *)mbuf1)->pool,
1255 (void **)&mbuf1, 1, 0);
1257 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
1258 vsetq_lane_u64(0x80000, xmask23, 0);
1260 __mempool_check_cookies(
1261 ((struct rte_mbuf *)mbuf2)->pool,
1262 (void **)&mbuf2, 1, 0);
1264 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
1265 vsetq_lane_u64(0x80000, xmask23, 1);
1267 __mempool_check_cookies(
1268 ((struct rte_mbuf *)mbuf3)->pool,
1269 (void **)&mbuf3, 1, 0);
1270 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1271 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1272 /* Ensuring mbuf fields which got updated in
1273 * cnxk_nix_prefree_seg are written before LMTST.
1277 /* Move mbufs to iova */
1278 mbuf0 = (uint64_t *)tx_pkts[0];
1279 mbuf1 = (uint64_t *)tx_pkts[1];
1280 mbuf2 = (uint64_t *)tx_pkts[2];
1281 mbuf3 = (uint64_t *)tx_pkts[3];
1283 /* Mark mempool object as "put" since
1284 * it is freed by NIX
1286 __mempool_check_cookies(
1287 ((struct rte_mbuf *)mbuf0)->pool,
1288 (void **)&mbuf0, 1, 0);
1290 __mempool_check_cookies(
1291 ((struct rte_mbuf *)mbuf1)->pool,
1292 (void **)&mbuf1, 1, 0);
1294 __mempool_check_cookies(
1295 ((struct rte_mbuf *)mbuf2)->pool,
1296 (void **)&mbuf2, 1, 0);
1298 __mempool_check_cookies(
1299 ((struct rte_mbuf *)mbuf3)->pool,
1300 (void **)&mbuf3, 1, 0);
1301 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1306 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1307 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1308 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1309 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
1310 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
1312 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
1313 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
1314 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
1315 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
1317 if (flags & NIX_TX_NEED_EXT_HDR) {
1318 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
1319 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
1320 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
1321 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
1324 if (flags & NIX_TX_NEED_EXT_HDR) {
1325 /* With ext header in the command we can no longer send
1326 * all 4 packets together since LMTLINE is 128bytes.
1327 * Split and Tx twice.
1330 vst1q_u64(lmt_addr, cmd0[0]);
1331 vst1q_u64(lmt_addr + 2, cmd2[0]);
1332 vst1q_u64(lmt_addr + 4, cmd1[0]);
1333 vst1q_u64(lmt_addr + 6, cmd0[1]);
1334 vst1q_u64(lmt_addr + 8, cmd2[1]);
1335 vst1q_u64(lmt_addr + 10, cmd1[1]);
1336 lmt_status = roc_lmt_submit_ldeor(io_addr);
1337 } while (lmt_status == 0);
1340 vst1q_u64(lmt_addr, cmd0[2]);
1341 vst1q_u64(lmt_addr + 2, cmd2[2]);
1342 vst1q_u64(lmt_addr + 4, cmd1[2]);
1343 vst1q_u64(lmt_addr + 6, cmd0[3]);
1344 vst1q_u64(lmt_addr + 8, cmd2[3]);
1345 vst1q_u64(lmt_addr + 10, cmd1[3]);
1346 lmt_status = roc_lmt_submit_ldeor(io_addr);
1347 } while (lmt_status == 0);
1350 vst1q_u64(lmt_addr, cmd0[0]);
1351 vst1q_u64(lmt_addr + 2, cmd1[0]);
1352 vst1q_u64(lmt_addr + 4, cmd0[1]);
1353 vst1q_u64(lmt_addr + 6, cmd1[1]);
1354 vst1q_u64(lmt_addr + 8, cmd0[2]);
1355 vst1q_u64(lmt_addr + 10, cmd1[2]);
1356 vst1q_u64(lmt_addr + 12, cmd0[3]);
1357 vst1q_u64(lmt_addr + 14, cmd1[3]);
1358 lmt_status = roc_lmt_submit_ldeor(io_addr);
1359 } while (lmt_status == 0);
1361 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
1364 if (unlikely(pkts_left))
1365 pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,
1372 static __rte_always_inline uint16_t
1373 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1374 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1376 RTE_SET_USED(tx_queue);
1377 RTE_SET_USED(tx_pkts);
1380 RTE_SET_USED(flags);
1385 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
1386 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
1387 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
1388 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
1389 #define TSO_F NIX_TX_OFFLOAD_TSO_F
1390 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
1392 /* [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
1393 #define NIX_TX_FASTPATH_MODES \
1394 T(no_offload, 0, 0, 0, 0, 0, 0, 4, \
1395 NIX_TX_OFFLOAD_NONE) \
1396 T(l3l4csum, 0, 0, 0, 0, 0, 1, 4, \
1398 T(ol3ol4csum, 0, 0, 0, 0, 1, 0, 4, \
1400 T(ol3ol4csum_l3l4csum, 0, 0, 0, 0, 1, 1, 4, \
1401 OL3OL4CSUM_F | L3L4CSUM_F) \
1402 T(vlan, 0, 0, 0, 1, 0, 0, 6, \
1404 T(vlan_l3l4csum, 0, 0, 0, 1, 0, 1, 6, \
1405 VLAN_F | L3L4CSUM_F) \
1406 T(vlan_ol3ol4csum, 0, 0, 0, 1, 1, 0, 6, \
1407 VLAN_F | OL3OL4CSUM_F) \
1408 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 1, 6, \
1409 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1410 T(noff, 0, 0, 1, 0, 0, 0, 4, \
1412 T(noff_l3l4csum, 0, 0, 1, 0, 0, 1, 4, \
1413 NOFF_F | L3L4CSUM_F) \
1414 T(noff_ol3ol4csum, 0, 0, 1, 0, 1, 0, 4, \
1415 NOFF_F | OL3OL4CSUM_F) \
1416 T(noff_ol3ol4csum_l3l4csum, 0, 0, 1, 0, 1, 1, 4, \
1417 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1418 T(noff_vlan, 0, 0, 1, 1, 0, 0, 6, \
1420 T(noff_vlan_l3l4csum, 0, 0, 1, 1, 0, 1, 6, \
1421 NOFF_F | VLAN_F | L3L4CSUM_F) \
1422 T(noff_vlan_ol3ol4csum, 0, 0, 1, 1, 1, 0, 6, \
1423 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1424 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 1, 6, \
1425 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1426 T(tso, 0, 1, 0, 0, 0, 0, 6, \
1428 T(tso_l3l4csum, 0, 1, 0, 0, 0, 1, 6, \
1429 TSO_F | L3L4CSUM_F) \
1430 T(tso_ol3ol4csum, 0, 1, 0, 0, 1, 0, 6, \
1431 TSO_F | OL3OL4CSUM_F) \
1432 T(tso_ol3ol4csum_l3l4csum, 0, 1, 0, 0, 1, 1, 6, \
1433 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1434 T(tso_vlan, 0, 1, 0, 1, 0, 0, 6, \
1436 T(tso_vlan_l3l4csum, 0, 1, 0, 1, 0, 1, 6, \
1437 TSO_F | VLAN_F | L3L4CSUM_F) \
1438 T(tso_vlan_ol3ol4csum, 0, 1, 0, 1, 1, 0, 6, \
1439 TSO_F | VLAN_F | OL3OL4CSUM_F) \
1440 T(tso_vlan_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 1, 6, \
1441 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1442 T(tso_noff, 0, 1, 1, 0, 0, 0, 6, \
1444 T(tso_noff_l3l4csum, 0, 1, 1, 0, 0, 1, 6, \
1445 TSO_F | NOFF_F | L3L4CSUM_F) \
1446 T(tso_noff_ol3ol4csum, 0, 1, 1, 0, 1, 0, 6, \
1447 TSO_F | NOFF_F | OL3OL4CSUM_F) \
1448 T(tso_noff_ol3ol4csum_l3l4csum, 0, 1, 1, 0, 1, 1, 6, \
1449 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1450 T(tso_noff_vlan, 0, 1, 1, 1, 0, 0, 6, \
1451 TSO_F | NOFF_F | VLAN_F) \
1452 T(tso_noff_vlan_l3l4csum, 0, 1, 1, 1, 0, 1, 6, \
1453 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1454 T(tso_noff_vlan_ol3ol4csum, 0, 1, 1, 1, 1, 0, 6, \
1455 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1456 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 1, 6, \
1457 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1458 T(ts, 1, 0, 0, 0, 0, 0, 8, \
1460 T(ts_l3l4csum, 1, 0, 0, 0, 0, 1, 8, \
1461 TSP_F | L3L4CSUM_F) \
1462 T(ts_ol3ol4csum, 1, 0, 0, 0, 1, 0, 8, \
1463 TSP_F | OL3OL4CSUM_F) \
1464 T(ts_ol3ol4csum_l3l4csum, 1, 0, 0, 0, 1, 1, 8, \
1465 TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1466 T(ts_vlan, 1, 0, 0, 1, 0, 0, 8, \
1468 T(ts_vlan_l3l4csum, 1, 0, 0, 1, 0, 1, 8, \
1469 TSP_F | VLAN_F | L3L4CSUM_F) \
1470 T(ts_vlan_ol3ol4csum, 1, 0, 0, 1, 1, 0, 8, \
1471 TSP_F | VLAN_F | OL3OL4CSUM_F) \
1472 T(ts_vlan_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 1, 8, \
1473 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1474 T(ts_noff, 1, 0, 1, 0, 0, 0, 8, \
1476 T(ts_noff_l3l4csum, 1, 0, 1, 0, 0, 1, 8, \
1477 TSP_F | NOFF_F | L3L4CSUM_F) \
1478 T(ts_noff_ol3ol4csum, 1, 0, 1, 0, 1, 0, 8, \
1479 TSP_F | NOFF_F | OL3OL4CSUM_F) \
1480 T(ts_noff_ol3ol4csum_l3l4csum, 1, 0, 1, 0, 1, 1, 8, \
1481 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1482 T(ts_noff_vlan, 1, 0, 1, 1, 0, 0, 8, \
1483 TSP_F | NOFF_F | VLAN_F) \
1484 T(ts_noff_vlan_l3l4csum, 1, 0, 1, 1, 0, 1, 8, \
1485 TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1486 T(ts_noff_vlan_ol3ol4csum, 1, 0, 1, 1, 1, 0, 8, \
1487 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1488 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 1, 8, \
1489 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1490 T(ts_tso, 1, 1, 0, 0, 0, 0, 8, \
1492 T(ts_tso_l3l4csum, 1, 1, 0, 0, 0, 1, 8, \
1493 TSP_F | TSO_F | L3L4CSUM_F) \
1494 T(ts_tso_ol3ol4csum, 1, 1, 0, 0, 1, 0, 8, \
1495 TSP_F | TSO_F | OL3OL4CSUM_F) \
1496 T(ts_tso_ol3ol4csum_l3l4csum, 1, 1, 0, 0, 1, 1, 8, \
1497 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1498 T(ts_tso_vlan, 1, 1, 0, 1, 0, 0, 8, \
1499 TSP_F | TSO_F | VLAN_F) \
1500 T(ts_tso_vlan_l3l4csum, 1, 1, 0, 1, 0, 1, 8, \
1501 TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
1502 T(ts_tso_vlan_ol3ol4csum, 1, 1, 0, 1, 1, 0, 8, \
1503 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
1504 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 1, 8, \
1505 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1506 T(ts_tso_noff, 1, 1, 1, 0, 0, 0, 8, \
1507 TSP_F | TSO_F | NOFF_F) \
1508 T(ts_tso_noff_l3l4csum, 1, 1, 1, 0, 0, 1, 8, \
1509 TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
1510 T(ts_tso_noff_ol3ol4csum, 1, 1, 1, 0, 1, 0, 8, \
1511 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
1512 T(ts_tso_noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 1, 1, 8, \
1513 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
1514 T(ts_tso_noff_vlan, 1, 1, 1, 1, 0, 0, 8, \
1515 TSP_F | TSO_F | NOFF_F | VLAN_F) \
1516 T(ts_tso_noff_vlan_l3l4csum, 1, 1, 1, 1, 0, 1, 8, \
1517 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
1518 T(ts_tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 1, 0, 8, \
1519 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
1520 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 1, 8, \
1521 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
1523 #define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \
1524 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name( \
1525 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1527 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name( \
1528 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
1530 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \
1531 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
1533 NIX_TX_FASTPATH_MODES
1536 #endif /* __CN9K_TX_H__ */