1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
7 #define NIX_TX_OFFLOAD_NONE (0)
8 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
9 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
10 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
11 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
12 #define NIX_TX_OFFLOAD_TSO_F BIT(4)
14 /* Flags to control xmit_prepare function.
15 * Defining it from backwards to denote its been
16 * not used as offload flags to pick function
18 #define NIX_TX_MULTI_SEG_F BIT(15)
20 #define NIX_TX_NEED_SEND_HDR_W1 \
21 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
22 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
24 #define NIX_TX_NEED_EXT_HDR \
25 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
27 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
29 /* Cached value is low, Update the fc_cache_pkts */ \
30 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
31 /* Multiply with sqe_per_sqb to express in pkts */ \
32 (txq)->fc_cache_pkts = \
33 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
34 << (txq)->sqes_per_sqb_log2; \
35 /* Check it again for the room */ \
36 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
41 /* Function to determine no of tx subdesc required in case ext
42 * sub desc is enabled.
44 static __rte_always_inline int
45 cn10k_nix_tx_ext_subs(const uint16_t flags)
48 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ? 1 : 0;
51 static __rte_always_inline uint64_t
52 cn10k_nix_tx_steor_data(const uint16_t flags)
54 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
57 /* This will be moved to addr area */
59 /* 15 vector sizes for single seg */
79 static __rte_always_inline void
80 cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
84 cmd[0] = txq->send_hdr_w0;
88 /* Send ext if present */
89 if (flags & NIX_TX_NEED_EXT_HDR) {
90 *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
99 static __rte_always_inline void
100 cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
102 uint64_t mask, ol_flags = m->ol_flags;
104 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
105 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
106 uint16_t *iplen, *oiplen, *oudplen;
107 uint16_t lso_sb, paylen;
109 mask = -!!(ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6));
110 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
111 m->l2_len + m->l3_len + m->l4_len;
113 /* Reduce payload len from base headers */
114 paylen = m->pkt_len - lso_sb;
116 /* Get iplen position assuming no tunnel hdr */
117 iplen = (uint16_t *)(mdata + m->l2_len +
118 (2 << !!(ol_flags & PKT_TX_IPV6)));
119 /* Handle tunnel tso */
120 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
121 (ol_flags & PKT_TX_TUNNEL_MASK)) {
122 const uint8_t is_udp_tun =
123 (CNXK_NIX_UDP_TUN_BITMASK >>
124 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
127 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
129 PKT_TX_OUTER_IPV6)));
130 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
133 /* Update format for UDP tunneled packet */
135 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
136 m->outer_l3_len + 4);
137 *oudplen = rte_cpu_to_be_16(
138 rte_be_to_cpu_16(*oudplen) - paylen);
141 /* Update iplen position to inner ip hdr */
142 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
144 (2 << !!(ol_flags & PKT_TX_IPV6)));
147 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
151 static __rte_always_inline void
152 cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
153 const uint16_t flags, const uint64_t lso_tun_fmt)
155 struct nix_send_ext_s *send_hdr_ext;
156 struct nix_send_hdr_s *send_hdr;
157 uint64_t ol_flags = 0, mask;
158 union nix_send_hdr_w1_u w1;
159 union nix_send_sg_s *sg;
161 send_hdr = (struct nix_send_hdr_s *)cmd;
162 if (flags & NIX_TX_NEED_EXT_HDR) {
163 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
164 sg = (union nix_send_sg_s *)(cmd + 4);
165 /* Clear previous markings */
166 send_hdr_ext->w0.lso = 0;
167 send_hdr_ext->w1.u = 0;
169 sg = (union nix_send_sg_s *)(cmd + 2);
172 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
173 ol_flags = m->ol_flags;
177 if (!(flags & NIX_TX_MULTI_SEG_F)) {
178 send_hdr->w0.total = m->data_len;
180 roc_npa_aura_handle_to_aura(m->pool->pool_id);
185 * 3 => IPV4 with csum
187 * L3type and L3ptr needs to be set for either
188 * L3 csum or L4 csum or LSO
192 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
193 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
194 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
195 const uint8_t ol3type =
196 ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
197 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
198 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
201 w1.ol3type = ol3type;
202 mask = 0xffffull << ((!!ol3type) << 4);
203 w1.ol3ptr = ~mask & m->outer_l2_len;
204 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
207 w1.ol4type = csum + (csum << 1);
210 w1.il3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
211 ((!!(ol_flags & PKT_TX_IPV6)) << 2);
212 w1.il3ptr = w1.ol4ptr + m->l2_len;
213 w1.il4ptr = w1.il3ptr + m->l3_len;
214 /* Increment it by 1 if it is IPV4 as 3 is with csum */
215 w1.il3type = w1.il3type + !!(ol_flags & PKT_TX_IP_CKSUM);
218 w1.il4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
220 /* In case of no tunnel header use only
221 * shift IL3/IL4 fields a bit to use
222 * OL3/OL4 for header checksum
225 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
226 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
228 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
229 const uint8_t csum = !!(ol_flags & PKT_TX_OUTER_UDP_CKSUM);
230 const uint8_t outer_l2_len = m->outer_l2_len;
233 w1.ol3ptr = outer_l2_len;
234 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
235 /* Increment it by 1 if it is IPV4 as 3 is with csum */
236 w1.ol3type = ((!!(ol_flags & PKT_TX_OUTER_IPV4)) << 1) +
237 ((!!(ol_flags & PKT_TX_OUTER_IPV6)) << 2) +
238 !!(ol_flags & PKT_TX_OUTER_IP_CKSUM);
241 w1.ol4type = csum + (csum << 1);
243 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
244 const uint8_t l2_len = m->l2_len;
246 /* Always use OLXPTR and OLXTYPE when only
247 * when one header is present
252 w1.ol4ptr = l2_len + m->l3_len;
253 /* Increment it by 1 if it is IPV4 as 3 is with csum */
254 w1.ol3type = ((!!(ol_flags & PKT_TX_IPV4)) << 1) +
255 ((!!(ol_flags & PKT_TX_IPV6)) << 2) +
256 !!(ol_flags & PKT_TX_IP_CKSUM);
259 w1.ol4type = (ol_flags & PKT_TX_L4_MASK) >> 52;
262 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
263 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & PKT_TX_VLAN);
264 /* HW will update ptr after vlan0 update */
265 send_hdr_ext->w1.vlan1_ins_ptr = 12;
266 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
268 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & PKT_TX_QINQ);
269 /* 2B before end of l2 header */
270 send_hdr_ext->w1.vlan0_ins_ptr = 12;
271 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
274 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & PKT_TX_TCP_SEG)) {
278 mask = -(!w1.il3type);
279 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
281 send_hdr_ext->w0.lso_sb = lso_sb;
282 send_hdr_ext->w0.lso = 1;
283 send_hdr_ext->w0.lso_mps = m->tso_segsz;
284 send_hdr_ext->w0.lso_format =
285 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
286 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
288 /* Handle tunnel tso */
289 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
290 (ol_flags & PKT_TX_TUNNEL_MASK)) {
291 const uint8_t is_udp_tun =
292 (CNXK_NIX_UDP_TUN_BITMASK >>
293 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
295 uint8_t shift = is_udp_tun ? 32 : 0;
297 shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
298 shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
300 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
301 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
302 /* Update format for UDP tunneled packet */
303 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
307 if (flags & NIX_TX_NEED_SEND_HDR_W1)
308 send_hdr->w1.u = w1.u;
310 if (!(flags & NIX_TX_MULTI_SEG_F)) {
311 sg->seg1_size = m->data_len;
312 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
314 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
315 /* DF bit = 1 if refcount of current mbuf or parent mbuf
317 * DF bit = 0 otherwise
319 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
321 /* Mark mempool object as "put" since it is freed by NIX */
322 if (!send_hdr->w0.df)
323 __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
326 /* With minimal offloads, 'cmd' being local could be optimized out to
327 * registers. In other cases, 'cmd' will be in stack. Intent is
328 * 'cmd' stores content from txq->cmd which is copied only once.
330 *((struct nix_send_hdr_s *)lmt_addr) = *send_hdr;
332 if (flags & NIX_TX_NEED_EXT_HDR) {
333 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
336 /* In case of multi-seg, sg template is stored here */
337 *((union nix_send_sg_s *)lmt_addr) = *sg;
338 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
341 static __rte_always_inline uint16_t
342 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
343 uint64_t *cmd, const uint16_t flags)
345 struct cn10k_eth_txq *txq = tx_queue;
346 const rte_iova_t io_addr = txq->io_addr;
347 uintptr_t pa, lmt_addr = txq->lmt_base;
348 uint16_t lmt_id, burst, left, i;
349 uint64_t lso_tun_fmt;
352 NIX_XMIT_FC_OR_RETURN(txq, pkts);
354 /* Get cmd skeleton */
355 cn10k_nix_tx_skeleton(txq, cmd, flags);
357 /* Reduce the cached count */
358 txq->fc_cache_pkts -= pkts;
360 if (flags & NIX_TX_OFFLOAD_TSO_F)
361 lso_tun_fmt = txq->lso_tun_fmt;
363 /* Get LMT base address and LMT ID as lcore id */
364 ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
367 burst = left > 32 ? 32 : left;
368 for (i = 0; i < burst; i++) {
369 /* Perform header writes for TSO, barrier at
370 * lmt steorl will suffice.
372 if (flags & NIX_TX_OFFLOAD_TSO_F)
373 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
375 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
377 lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
382 data = cn10k_nix_tx_steor_data(flags);
383 pa = io_addr | (data & 0x7) << 4;
385 data |= (15ULL << 12);
386 data |= (uint64_t)lmt_id;
389 roc_lmt_submit_steorl(data, pa);
391 data = cn10k_nix_tx_steor_data(flags);
392 pa = io_addr | (data & 0x7) << 4;
394 data |= ((uint64_t)(burst - 17)) << 12;
395 data |= (uint64_t)(lmt_id + 16);
398 roc_lmt_submit_steorl(data, pa);
400 data = cn10k_nix_tx_steor_data(flags);
401 pa = io_addr | (data & 0x7) << 4;
403 data |= ((uint64_t)(burst - 1)) << 12;
407 roc_lmt_submit_steorl(data, pa);
413 /* Start processing another burst */
415 /* Reset lmt base addr */
416 lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
417 lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
424 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
425 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
426 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
427 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
428 #define TSO_F NIX_TX_OFFLOAD_TSO_F
430 /* [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
431 #define NIX_TX_FASTPATH_MODES \
432 T(no_offload, 0, 0, 0, 0, 0, 4, \
433 NIX_TX_OFFLOAD_NONE) \
434 T(l3l4csum, 0, 0, 0, 0, 1, 4, \
436 T(ol3ol4csum, 0, 0, 0, 1, 0, 4, \
438 T(ol3ol4csum_l3l4csum, 0, 0, 0, 1, 1, 4, \
439 OL3OL4CSUM_F | L3L4CSUM_F) \
440 T(vlan, 0, 0, 1, 0, 0, 6, \
442 T(vlan_l3l4csum, 0, 0, 1, 0, 1, 6, \
443 VLAN_F | L3L4CSUM_F) \
444 T(vlan_ol3ol4csum, 0, 0, 1, 1, 0, 6, \
445 VLAN_F | OL3OL4CSUM_F) \
446 T(vlan_ol3ol4csum_l3l4csum, 0, 0, 1, 1, 1, 6, \
447 VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
448 T(noff, 0, 1, 0, 0, 0, 4, \
450 T(noff_l3l4csum, 0, 1, 0, 0, 1, 4, \
451 NOFF_F | L3L4CSUM_F) \
452 T(noff_ol3ol4csum, 0, 1, 0, 1, 0, 4, \
453 NOFF_F | OL3OL4CSUM_F) \
454 T(noff_ol3ol4csum_l3l4csum, 0, 1, 0, 1, 1, 4, \
455 NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
456 T(noff_vlan, 0, 1, 1, 0, 0, 6, \
458 T(noff_vlan_l3l4csum, 0, 1, 1, 0, 1, 6, \
459 NOFF_F | VLAN_F | L3L4CSUM_F) \
460 T(noff_vlan_ol3ol4csum, 0, 1, 1, 1, 0, 6, \
461 NOFF_F | VLAN_F | OL3OL4CSUM_F) \
462 T(noff_vlan_ol3ol4csum_l3l4csum, 0, 1, 1, 1, 1, 6, \
463 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
464 T(tso, 1, 0, 0, 0, 0, 6, \
466 T(tso_l3l4csum, 1, 0, 0, 0, 1, 6, \
467 TSO_F | L3L4CSUM_F) \
468 T(tso_ol3ol4csum, 1, 0, 0, 1, 0, 6, \
469 TSO_F | OL3OL4CSUM_F) \
470 T(tso_ol3ol4csum_l3l4csum, 1, 0, 0, 1, 1, 6, \
471 TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
472 T(tso_vlan, 1, 0, 1, 0, 0, 6, \
474 T(tso_vlan_l3l4csum, 1, 0, 1, 0, 1, 6, \
475 TSO_F | VLAN_F | L3L4CSUM_F) \
476 T(tso_vlan_ol3ol4csum, 1, 0, 1, 1, 0, 6, \
477 TSO_F | VLAN_F | OL3OL4CSUM_F) \
478 T(tso_vlan_ol3ol4csum_l3l4csum, 1, 0, 1, 1, 1, 6, \
479 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
480 T(tso_noff, 1, 1, 0, 0, 0, 6, \
482 T(tso_noff_l3l4csum, 1, 1, 0, 0, 1, 6, \
483 TSO_F | NOFF_F | L3L4CSUM_F) \
484 T(tso_noff_ol3ol4csum, 1, 1, 0, 1, 0, 6, \
485 TSO_F | NOFF_F | OL3OL4CSUM_F) \
486 T(tso_noff_ol3ol4csum_l3l4csum, 1, 1, 0, 1, 1, 6, \
487 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
488 T(tso_noff_vlan, 1, 1, 1, 0, 0, 6, \
489 TSO_F | NOFF_F | VLAN_F) \
490 T(tso_noff_vlan_l3l4csum, 1, 1, 1, 0, 1, 6, \
491 TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
492 T(tso_noff_vlan_ol3ol4csum, 1, 1, 1, 1, 0, 6, \
493 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
494 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 1, 1, 1, 1, 1, 6, \
495 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
497 #define T(name, f4, f3, f2, f1, f0, sz, flags) \
498 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
499 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
501 NIX_TX_FASTPATH_MODES
504 #endif /* __CN10K_TX_H__ */