1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2017 6WIND S.A.
3 * Copyright 2017 Mellanox Technologies, Ltd
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
15 #include <mlx5_glue.h>
18 #include "mlx5_defs.h"
20 #include "mlx5_utils.h"
21 #include "mlx5_rxtx.h"
23 #include "mlx5_rxtx_vec.h"
24 #include "mlx5_autoconf.h"
26 #if defined RTE_ARCH_X86_64
27 #include "mlx5_rxtx_vec_sse.h"
28 #elif defined RTE_ARCH_ARM64
29 #include "mlx5_rxtx_vec_neon.h"
30 #elif defined RTE_ARCH_PPC_64
31 #include "mlx5_rxtx_vec_altivec.h"
33 #error "This should not be compiled if SIMD instructions are not supported."
40 * Pointer to RX queue structure.
42 * Array to store received packets.
44 * Maximum number of packets in array.
47 * Number of packets successfully received (<= pkts_n).
50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
55 #ifdef MLX5_PMD_SOFT_COUNTERS
56 uint32_t err_bytes = 0;
59 for (i = 0; i < pkts_n; ++i) {
60 struct rte_mbuf *pkt = pkts[i];
62 if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
63 #ifdef MLX5_PMD_SOFT_COUNTERS
64 err_bytes += PKT_LEN(pkt);
66 rte_pktmbuf_free_seg(pkt);
71 rxq->stats.idropped += (pkts_n - n);
72 #ifdef MLX5_PMD_SOFT_COUNTERS
73 /* Correct counters of errored completions. */
74 rxq->stats.ipackets -= (pkts_n - n);
75 rxq->stats.ibytes -= err_bytes;
77 mlx5_rx_err_handle(rxq, 1);
82 * Replenish buffers for RX in bulk.
85 * Pointer to RX queue structure.
88 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
90 const uint16_t q_n = 1 << rxq->elts_n;
91 const uint16_t q_mask = q_n - 1;
92 uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
93 uint16_t elts_idx = rxq->rq_ci & q_mask;
94 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
95 volatile struct mlx5_wqe_data_seg *wq =
96 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
99 if (n >= rxq->rq_repl_thresh) {
100 MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
101 MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
102 MLX5_VPMD_DESCS_PER_LOOP);
103 /* Not to cross queue end. */
104 n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
105 if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
106 rxq->stats.rx_nombuf += n;
109 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) {
110 for (i = 0; i < n; ++i) {
112 * In order to support the mbufs with external attached
113 * data buffer we should use the buf_addr pointer
114 * instead of rte_mbuf_buf_addr(). It touches the mbuf
115 * itself and may impact the performance.
117 void *buf_addr = elts[i]->buf_addr;
119 wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
120 RTE_PKTMBUF_HEADROOM);
121 wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
124 for (i = 0; i < n; ++i) {
125 void *buf_addr = elts[i]->buf_addr;
127 wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
128 RTE_PKTMBUF_HEADROOM);
132 /* Prevent overflowing into consumed mbufs. */
133 elts_idx = rxq->rq_ci & q_mask;
134 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
135 (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
137 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
142 * Replenish buffers for MPRQ RX in bulk.
145 * Pointer to RX queue structure.
148 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
150 const uint16_t wqe_n = 1 << rxq->elts_n;
151 const uint32_t strd_n = 1 << rxq->strd_num_n;
152 const uint32_t elts_n = wqe_n * strd_n;
153 const uint32_t wqe_mask = elts_n - 1;
154 uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
155 uint32_t elts_idx = rxq->elts_ci & wqe_mask;
156 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
159 if (n >= rxq->rq_repl_thresh &&
160 rxq->elts_ci - rxq->rq_pi <=
161 rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) {
162 MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
163 MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
164 MLX5_VPMD_DESCS_PER_LOOP);
165 /* Not to cross queue end. */
166 n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
167 /* Limit replenish number to threshold value. */
168 n = RTE_MIN(n, rxq->rq_repl_thresh);
169 if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
170 rxq->stats.rx_nombuf += n;
174 /* Prevent overflowing into consumed mbufs. */
175 elts_idx = rxq->elts_ci & wqe_mask;
176 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
177 (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
182 * Copy or attach MPRQ buffers to RX SW ring.
185 * Pointer to RX queue structure.
187 * Pointer to array of packets to be stored.
189 * Number of packets to be stored.
192 * Number of packets successfully copied/attached (<= pkts_n).
194 static inline uint16_t
195 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
196 struct rte_mbuf **pkts, uint16_t pkts_n)
198 const uint16_t wqe_n = 1 << rxq->elts_n;
199 const uint16_t wqe_mask = wqe_n - 1;
200 const uint16_t strd_sz = 1 << rxq->strd_sz_n;
201 const uint32_t strd_n = 1 << rxq->strd_num_n;
202 const uint32_t elts_n = wqe_n * strd_n;
203 const uint32_t elts_mask = elts_n - 1;
204 uint32_t elts_idx = rxq->rq_pi & elts_mask;
205 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
206 uint32_t rq_ci = rxq->rq_ci;
207 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
211 for (i = 0; i < pkts_n; ++i) {
213 enum mlx5_rqx_code rxq_code;
215 if (rxq->consumed_strd == strd_n) {
216 /* Replace WQE if the buffer is still in use. */
217 mprq_buf_replace(rxq, rq_ci & wqe_mask);
218 /* Advance to the next WQE. */
219 rxq->consumed_strd = 0;
221 buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
224 if (!elts[i]->pkt_len) {
225 rxq->consumed_strd = strd_n;
226 rte_pktmbuf_free_seg(elts[i]);
227 #ifdef MLX5_PMD_SOFT_COUNTERS
228 rxq->stats.ipackets -= 1;
232 strd_cnt = (elts[i]->pkt_len / strd_sz) +
233 ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
234 rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
235 buf, rxq->consumed_strd, strd_cnt);
236 rxq->consumed_strd += strd_cnt;
237 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
238 rte_pktmbuf_free_seg(elts[i]);
239 #ifdef MLX5_PMD_SOFT_COUNTERS
240 rxq->stats.ipackets -= 1;
241 rxq->stats.ibytes -= elts[i]->pkt_len;
243 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
244 ++rxq->stats.rx_nombuf;
247 if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
248 ++rxq->stats.idropped;
252 pkts[copied++] = elts[i];
257 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
258 if (rq_ci != rxq->rq_ci) {
261 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
267 * Receive burst of packets. An errored completion also consumes a mbuf, but the
268 * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
269 * before returning to application.
272 * Pointer to RX queue structure.
274 * Array to store received packets.
276 * Maximum number of packets in array.
278 * Pointer to a flag. Set non-zero value if pkts array has at least one error
281 * Pointer to a boolean. Set true if no new CQE seen.
284 * Number of packets received including errors (<= pkts_n).
286 static inline uint16_t
287 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
288 uint16_t pkts_n, uint64_t *err, bool *no_cq)
290 const uint16_t q_n = 1 << rxq->cqe_n;
291 const uint16_t q_mask = q_n - 1;
292 const uint16_t e_n = 1 << rxq->elts_n;
293 const uint16_t e_mask = e_n - 1;
294 volatile struct mlx5_cqe *cq;
295 struct rte_mbuf **elts;
296 uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
297 uint16_t nocmp_n = 0;
298 uint16_t rcvd_pkt = 0;
299 unsigned int cq_idx = rxq->cq_ci & q_mask;
300 unsigned int elts_idx;
302 MLX5_ASSERT(rxq->sges_n == 0);
303 MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
304 cq = &(*rxq->cqes)[cq_idx];
306 rte_prefetch0(cq + 1);
307 rte_prefetch0(cq + 2);
308 rte_prefetch0(cq + 3);
309 pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
310 mlx5_rx_replenish_bulk_mbuf(rxq);
311 /* See if there're unreturned mbufs from compressed CQE. */
312 rcvd_pkt = rxq->decompressed;
314 rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
315 rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
317 rxq->rq_pi += rcvd_pkt;
318 rxq->decompressed -= rcvd_pkt;
321 elts_idx = rxq->rq_pi & e_mask;
322 elts = &(*rxq->elts)[elts_idx];
323 /* Not to overflow pkts array. */
324 pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
325 /* Not to cross queue end. */
326 pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
327 pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
332 /* At this point, there shouldn't be any remaining packets. */
333 MLX5_ASSERT(rxq->decompressed == 0);
334 /* Process all the CQEs */
335 nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
336 /* If no new CQE seen, return without updating cq_db. */
337 if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
341 /* Update the consumer indexes for non-compressed CQEs. */
342 MLX5_ASSERT(nocmp_n <= pkts_n);
343 rxq->cq_ci += nocmp_n;
344 rxq->rq_pi += nocmp_n;
346 /* Decompress the last CQE if compressed. */
347 if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
348 MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
349 rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
351 rxq->cq_ci += rxq->decompressed;
352 /* Return more packets if needed. */
353 if (nocmp_n < pkts_n) {
354 uint16_t n = rxq->decompressed;
356 n = RTE_MIN(n, pkts_n - nocmp_n);
357 rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
361 rxq->decompressed -= n;
365 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
371 * DPDK callback for vectorized RX.
374 * Generic pointer to RX queue structure.
376 * Array to store received packets.
378 * Maximum number of packets in array.
381 * Number of packets successfully received (<= pkts_n).
384 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
386 struct mlx5_rxq_data *rxq = dpdk_rxq;
393 nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
395 if (unlikely(err | rxq->err_state))
396 nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
400 } while (tn != pkts_n);
405 * Receive burst of packets. An errored completion also consumes a mbuf, but the
406 * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
407 * before returning to application.
410 * Pointer to RX queue structure.
412 * Array to store received packets.
414 * Maximum number of packets in array.
416 * Pointer to a flag. Set non-zero value if pkts array has at least one error
419 * Pointer to a boolean. Set true if no new CQE seen.
422 * Number of packets received including errors (<= pkts_n).
424 static inline uint16_t
425 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
426 uint16_t pkts_n, uint64_t *err, bool *no_cq)
428 const uint16_t q_n = 1 << rxq->cqe_n;
429 const uint16_t q_mask = q_n - 1;
430 const uint16_t wqe_n = 1 << rxq->elts_n;
431 const uint32_t strd_n = 1 << rxq->strd_num_n;
432 const uint32_t elts_n = wqe_n * strd_n;
433 const uint32_t elts_mask = elts_n - 1;
434 volatile struct mlx5_cqe *cq;
435 struct rte_mbuf **elts;
436 uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
437 uint16_t nocmp_n = 0;
438 uint16_t rcvd_pkt = 0;
440 unsigned int cq_idx = rxq->cq_ci & q_mask;
441 unsigned int elts_idx;
443 MLX5_ASSERT(rxq->sges_n == 0);
444 cq = &(*rxq->cqes)[cq_idx];
446 rte_prefetch0(cq + 1);
447 rte_prefetch0(cq + 2);
448 rte_prefetch0(cq + 3);
449 pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
450 mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
451 /* Not to move past the allocated mbufs. */
452 pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
453 /* See if there're unreturned mbufs from compressed CQE. */
454 rcvd_pkt = rxq->decompressed;
456 rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
457 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
458 rxq->decompressed -= rcvd_pkt;
461 elts_idx = rxq->rq_pi & elts_mask;
462 elts = &(*rxq->elts)[elts_idx];
463 /* Not to overflow pkts array. */
464 pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
465 /* Not to cross queue end. */
466 pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
467 pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
472 /* At this point, there shouldn't be any remaining packets. */
473 MLX5_ASSERT(rxq->decompressed == 0);
474 /* Process all the CQEs */
475 nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
476 /* If no new CQE seen, return without updating cq_db. */
477 if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
481 /* Update the consumer indexes for non-compressed CQEs. */
482 MLX5_ASSERT(nocmp_n <= pkts_n);
483 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
485 /* Decompress the last CQE if compressed. */
486 if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
487 MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
488 rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
490 /* Return more packets if needed. */
491 if (nocmp_n < pkts_n) {
492 uint16_t n = rxq->decompressed;
494 n = RTE_MIN(n, pkts_n - nocmp_n);
495 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
497 rxq->decompressed -= n;
505 * DPDK callback for vectorized MPRQ RX.
508 * Generic pointer to RX queue structure.
510 * Array to store received packets.
512 * Maximum number of packets in array.
515 * Number of packets successfully received (<= pkts_n).
518 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
520 struct mlx5_rxq_data *rxq = dpdk_rxq;
527 nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
529 if (unlikely(err | rxq->err_state))
530 nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
534 } while (tn != pkts_n);
539 * Check a RX queue can support vectorized RX.
542 * Pointer to RX queue.
545 * 1 if supported, negative errno value if not.
548 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
550 struct mlx5_rxq_ctrl *ctrl =
551 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
553 if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
561 * Check a device can support vectorized RX.
564 * Pointer to Ethernet device.
567 * 1 if supported, negative errno value if not.
570 mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
572 struct mlx5_priv *priv = dev->data->dev_private;
575 if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
577 if (!priv->config.rx_vec_en)
579 /* All the configured queues should support. */
580 for (i = 0; i < priv->rxqs_n; ++i) {
581 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
585 if (mlx5_rxq_check_vec_support(rxq) < 0)
588 if (i != priv->rxqs_n)