1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2021 6WIND S.A.
3 * Copyright 2021 Mellanox Technologies, Ltd
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_common.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_ether.h>
16 #include <rte_cycles.h>
20 #include <mlx5_common.h>
22 #include "mlx5_autoconf.h"
23 #include "mlx5_defs.h"
26 #include "mlx5_utils.h"
27 #include "mlx5_rxtx.h"
31 static __rte_always_inline uint32_t
32 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
33 volatile struct mlx5_mini_cqe8 *mcqe);
35 static __rte_always_inline int
36 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
37 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
39 static __rte_always_inline uint32_t
40 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
42 static __rte_always_inline void
43 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
44 volatile struct mlx5_cqe *cqe,
45 volatile struct mlx5_mini_cqe8 *mcqe);
48 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
49 volatile struct mlx5_cqe *__rte_restrict cqe,
50 uint32_t phcsum, uint8_t l4_type);
53 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
54 volatile struct mlx5_cqe *__rte_restrict cqe,
55 volatile struct mlx5_mini_cqe8 *mcqe,
56 struct mlx5_rxq_data *rxq, uint32_t len);
60 * Internal function to compute the number of used descriptors in an RX queue.
66 * The number of used Rx descriptor.
69 rx_queue_count(struct mlx5_rxq_data *rxq)
71 struct rxq_zip *zip = &rxq->zip;
72 volatile struct mlx5_cqe *cqe;
73 const unsigned int cqe_n = (1 << rxq->cqe_n);
74 const unsigned int sges_n = (1 << rxq->sges_n);
75 const unsigned int elts_n = (1 << rxq->elts_n);
76 const unsigned int strd_n = (1 << rxq->strd_num_n);
77 const unsigned int cqe_cnt = cqe_n - 1;
78 unsigned int cq_ci, used;
80 /* if we are processing a compressed cqe */
82 used = zip->cqe_cnt - zip->ai;
88 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
89 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) {
94 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
95 n = rte_be_to_cpu_32(cqe->byte_cnt);
100 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
102 used = RTE_MIN(used * sges_n, elts_n * strd_n);
107 * DPDK callback to check the status of a Rx descriptor.
112 * The index of the descriptor in the ring.
115 * The status of the Rx descriptor.
118 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
120 struct mlx5_rxq_data *rxq = rx_queue;
121 struct mlx5_rxq_ctrl *rxq_ctrl =
122 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
123 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
125 if (dev->rx_pkt_burst == NULL ||
126 dev->rx_pkt_burst == removed_rx_burst) {
130 if (offset >= (1 << rxq->cqe_n)) {
134 if (offset < rx_queue_count(rxq))
135 return RTE_ETH_RX_DESC_DONE;
136 return RTE_ETH_RX_DESC_AVAIL;
140 * DPDK callback to get the RX queue information.
143 * Pointer to the device structure.
146 * Rx queue identificator.
149 * Pointer to the RX queue information structure.
156 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
157 struct rte_eth_rxq_info *qinfo)
159 struct mlx5_priv *priv = dev->data->dev_private;
160 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
161 struct mlx5_rxq_ctrl *rxq_ctrl =
162 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
166 qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ?
167 rxq->mprq_mp : rxq->mp;
168 qinfo->conf.rx_thresh.pthresh = 0;
169 qinfo->conf.rx_thresh.hthresh = 0;
170 qinfo->conf.rx_thresh.wthresh = 0;
171 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh;
172 qinfo->conf.rx_drop_en = 1;
173 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1;
174 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
175 qinfo->scattered_rx = dev->data->scattered_rx;
176 qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ?
177 (1 << rxq->elts_n) * (1 << rxq->strd_num_n) :
182 * DPDK callback to get the RX packet burst mode information.
185 * Pointer to the device structure.
188 * Rx queue identificatior.
191 * Pointer to the burts mode information.
194 * 0 as success, -EINVAL as failure.
197 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
198 uint16_t rx_queue_id __rte_unused,
199 struct rte_eth_burst_mode *mode)
201 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
202 struct mlx5_priv *priv = dev->data->dev_private;
203 struct mlx5_rxq_data *rxq;
205 rxq = (*priv->rxqs)[rx_queue_id];
210 if (pkt_burst == mlx5_rx_burst) {
211 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar");
212 } else if (pkt_burst == mlx5_rx_burst_mprq) {
213 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ");
214 } else if (pkt_burst == mlx5_rx_burst_vec) {
215 #if defined RTE_ARCH_X86_64
216 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE");
217 #elif defined RTE_ARCH_ARM64
218 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon");
219 #elif defined RTE_ARCH_PPC_64
220 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec");
224 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) {
225 #if defined RTE_ARCH_X86_64
226 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE");
227 #elif defined RTE_ARCH_ARM64
228 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon");
229 #elif defined RTE_ARCH_PPC_64
230 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec");
241 * DPDK callback to get the number of used descriptors in a RX queue.
244 * Pointer to the device structure.
250 * The number of used rx descriptor.
251 * -EINVAL if the queue is invalid
254 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
256 struct mlx5_priv *priv = dev->data->dev_private;
257 struct mlx5_rxq_data *rxq;
259 if (dev->rx_pkt_burst == NULL ||
260 dev->rx_pkt_burst == removed_rx_burst) {
264 rxq = (*priv->rxqs)[rx_queue_id];
269 return rx_queue_count(rxq);
272 #define CLB_VAL_IDX 0
273 #define CLB_MSK_IDX 1
275 mlx5_monitor_callback(const uint64_t value,
276 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
278 const uint64_t m = opaque[CLB_MSK_IDX];
279 const uint64_t v = opaque[CLB_VAL_IDX];
281 return (value & m) == v ? -1 : 0;
284 int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
286 struct mlx5_rxq_data *rxq = rx_queue;
287 const unsigned int cqe_num = 1 << rxq->cqe_n;
288 const unsigned int cqe_mask = cqe_num - 1;
289 const uint16_t idx = rxq->cq_ci & cqe_num;
290 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
292 if (unlikely(rxq->cqes == NULL)) {
296 pmc->addr = &cqe->op_own;
297 pmc->opaque[CLB_VAL_IDX] = !!idx;
298 pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK;
299 pmc->fn = mlx5_monitor_callback;
300 pmc->size = sizeof(uint8_t);
305 * Translate RX completion flags to packet type.
308 * Pointer to RX queue structure.
312 * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
315 * Packet type for struct rte_mbuf.
317 static inline uint32_t
318 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
319 volatile struct mlx5_mini_cqe8 *mcqe)
323 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6;
325 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/
327 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
328 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10;
330 ptype = mcqe->hdr_type >> 2;
332 * The index to the array should have:
333 * bit[1:0] = l3_hdr_type
334 * bit[4:2] = l4_hdr_type
337 * bit[7] = outer_l3_type
340 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
344 * Initialize Rx WQ and indexes.
347 * Pointer to RX queue structure.
350 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
352 const unsigned int wqe_n = 1 << rxq->elts_n;
355 for (i = 0; (i != wqe_n); ++i) {
356 volatile struct mlx5_wqe_data_seg *scat;
360 if (mlx5_rxq_mprq_enabled(rxq)) {
361 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i];
363 scat = &((volatile struct mlx5_wqe_mprq *)
365 addr = (uintptr_t)mlx5_mprq_buf_addr(buf,
366 1 << rxq->strd_num_n);
367 byte_count = (1 << rxq->strd_sz_n) *
368 (1 << rxq->strd_num_n);
370 struct rte_mbuf *buf = (*rxq->elts)[i];
372 scat = &((volatile struct mlx5_wqe_data_seg *)
374 addr = rte_pktmbuf_mtod(buf, uintptr_t);
375 byte_count = DATA_LEN(buf);
377 /* scat->addr must be able to store a pointer. */
378 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t));
379 *scat = (struct mlx5_wqe_data_seg){
380 .addr = rte_cpu_to_be_64(addr),
381 .byte_count = rte_cpu_to_be_32(byte_count),
382 .lkey = mlx5_rx_addr2mr(rxq, addr),
385 rxq->consumed_strd = 0;
386 rxq->decompressed = 0;
388 rxq->zip = (struct rxq_zip){
391 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ?
392 (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0;
393 /* Update doorbell counter. */
394 rxq->rq_ci = wqe_n >> rxq->sges_n;
396 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
401 * The function inserts the RQ state to reset when the first error CQE is
402 * shown, then drains the CQ by the caller function loop. When the CQ is empty,
403 * it moves the RQ state to ready and initializes the RQ.
404 * Next CQE identification and error counting are in the caller responsibility.
407 * Pointer to RX queue structure.
409 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ.
410 * 0 when called from non-vectorized Rx burst.
413 * -1 in case of recovery error, otherwise the CQE status.
416 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
418 const uint16_t cqe_n = 1 << rxq->cqe_n;
419 const uint16_t cqe_mask = cqe_n - 1;
420 const uint16_t wqe_n = 1 << rxq->elts_n;
421 const uint16_t strd_n = 1 << rxq->strd_num_n;
422 struct mlx5_rxq_ctrl *rxq_ctrl =
423 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
425 volatile struct mlx5_cqe *cqe;
426 volatile struct mlx5_err_cqe *err_cqe;
428 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
430 struct mlx5_mp_arg_queue_state_modify sm;
433 switch (rxq->err_state) {
434 case MLX5_RXQ_ERR_STATE_NO_ERROR:
435 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
437 case MLX5_RXQ_ERR_STATE_NEED_RESET:
439 sm.queue_id = rxq->idx;
440 sm.state = IBV_WQS_RESET;
441 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm))
443 if (rxq_ctrl->dump_file_n <
444 rxq_ctrl->priv->config.max_dump_files_num) {
445 MKSTR(err_str, "Unexpected CQE error syndrome "
446 "0x%02x CQN = %u RQN = %u wqe_counter = %u"
447 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
448 rxq->cqn, rxq_ctrl->wqn,
449 rte_be_to_cpu_16(u.err_cqe->wqe_counter),
450 rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
451 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
452 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc());
453 mlx5_dump_debug_information(name, NULL, err_str, 0);
454 mlx5_dump_debug_information(name, "MLX5 Error CQ:",
455 (const void *)((uintptr_t)
457 sizeof(*u.cqe) * cqe_n);
458 mlx5_dump_debug_information(name, "MLX5 Error RQ:",
459 (const void *)((uintptr_t)
462 rxq_ctrl->dump_file_n++;
464 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY;
466 case MLX5_RXQ_ERR_STATE_NEED_READY:
467 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
468 if (ret == MLX5_CQE_STATUS_HW_OWN) {
470 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
473 * The RQ consumer index must be zeroed while moving
474 * from RESET state to RDY state.
476 *rxq->rq_db = rte_cpu_to_be_32(0);
479 sm.queue_id = rxq->idx;
480 sm.state = IBV_WQS_RDY;
481 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv),
485 const uint32_t elts_n =
486 mlx5_rxq_mprq_enabled(rxq) ?
487 wqe_n * strd_n : wqe_n;
488 const uint32_t e_mask = elts_n - 1;
490 mlx5_rxq_mprq_enabled(rxq) ?
491 rxq->elts_ci : rxq->rq_ci;
493 struct rte_mbuf **elt;
495 unsigned int n = elts_n - (elts_ci -
498 for (i = 0; i < (int)n; ++i) {
499 elt_idx = (elts_ci + i) & e_mask;
500 elt = &(*rxq->elts)[elt_idx];
501 *elt = rte_mbuf_raw_alloc(rxq->mp);
503 for (i--; i >= 0; --i) {
514 for (i = 0; i < (int)elts_n; ++i) {
515 elt = &(*rxq->elts)[i];
517 (uint16_t)((*elt)->buf_len -
518 rte_pktmbuf_headroom(*elt));
520 /* Padding with a fake mbuf for vec Rx. */
521 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
522 (*rxq->elts)[elts_n + i] =
525 mlx5_rxq_initialize(rxq);
526 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
535 * Get size of the next packet for a given CQE. For compressed CQEs, the
536 * consumer index is updated only once all packets of the current one have
540 * Pointer to RX queue.
544 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
548 * 0 in case of empty CQE, otherwise the packet size in bytes.
551 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
552 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
554 struct rxq_zip *zip = &rxq->zip;
555 uint16_t cqe_n = cqe_cnt + 1;
561 /* Process compressed data in the CQE and mini arrays. */
563 volatile struct mlx5_mini_cqe8 (*mc)[8] =
564 (volatile struct mlx5_mini_cqe8 (*)[8])
565 (uintptr_t)(&(*rxq->cqes)[zip->ca &
567 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt &
569 *mcqe = &(*mc)[zip->ai & 7];
570 if ((++zip->ai & 7) == 0) {
571 /* Invalidate consumed CQEs */
575 (*rxq->cqes)[idx & cqe_cnt].op_own =
580 * Increment consumer index to skip the number
581 * of CQEs consumed. Hardware leaves holes in
582 * the CQ ring for software use.
587 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
588 /* Invalidate the rest */
593 (*rxq->cqes)[idx & cqe_cnt].op_own =
597 rxq->cq_ci = zip->cq_ci;
601 * No compressed data, get next CQE and verify if it is
609 ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
610 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
611 if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
613 ret = mlx5_rx_err_handle(rxq, 0);
614 if (ret == MLX5_CQE_STATUS_HW_OWN ||
622 * Introduce the local variable to have queue cq_ci
623 * index in queue structure always consistent with
624 * actual CQE boundary (not pointing to the middle
625 * of compressed CQE session).
627 cq_ci = rxq->cq_ci + 1;
628 op_own = cqe->op_own;
629 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
630 volatile struct mlx5_mini_cqe8 (*mc)[8] =
631 (volatile struct mlx5_mini_cqe8 (*)[8])
632 (uintptr_t)(&(*rxq->cqes)
633 [cq_ci & cqe_cnt].pkt_info);
635 /* Fix endianness. */
636 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
638 * Current mini array position is the one
639 * returned by check_cqe64().
641 * If completion comprises several mini arrays,
642 * as a special case the second one is located
643 * 7 CQEs after the initial CQE instead of 8
644 * for subsequent ones.
647 zip->na = zip->ca + 7;
648 /* Compute the next non compressed CQE. */
649 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
650 /* Get packet size to return. */
651 len = rte_be_to_cpu_32((*mc)[0].byte_cnt &
655 /* Prefetch all to be invalidated */
659 rte_prefetch0(&(*rxq->cqes)[(idx) &
665 len = rte_be_to_cpu_32(cqe->byte_cnt);
668 if (unlikely(rxq->err_state)) {
669 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
670 ++rxq->stats.idropped;
678 * Translate RX completion flags to offload flags.
684 * Offload flags (ol_flags) for struct rte_mbuf.
686 static inline uint32_t
687 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe)
689 uint32_t ol_flags = 0;
690 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
694 MLX5_CQE_RX_L3_HDR_VALID,
695 PKT_RX_IP_CKSUM_GOOD) |
697 MLX5_CQE_RX_L4_HDR_VALID,
698 PKT_RX_L4_CKSUM_GOOD);
703 * Fill in mbuf fields from RX completion flags.
704 * Note that pkt->ol_flags should be initialized outside of this function.
707 * Pointer to RX queue.
712 * @param rss_hash_res
713 * Packet RSS Hash result.
716 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
717 volatile struct mlx5_cqe *cqe,
718 volatile struct mlx5_mini_cqe8 *mcqe)
720 /* Update packet information. */
721 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe);
724 uint32_t rss_hash_res = 0;
726 /* If compressed, take hash result from mini-CQE. */
728 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)
729 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
731 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result);
733 pkt->hash.rss = rss_hash_res;
734 pkt->ol_flags |= PKT_RX_RSS_HASH;
740 /* If compressed, take flow tag from mini-CQE. */
742 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
743 mark = cqe->sop_drop_qpn;
745 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) |
746 (mcqe->flow_tag_high << 16);
747 if (MLX5_FLOW_MARK_IS_VALID(mark)) {
748 pkt->ol_flags |= PKT_RX_FDIR;
749 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
750 pkt->ol_flags |= PKT_RX_FDIR_ID;
751 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
755 if (rxq->dynf_meta) {
756 uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata >>
757 __builtin_popcount(rxq->flow_meta_port_mask)) &
758 rxq->flow_meta_port_mask;
761 pkt->ol_flags |= rxq->flow_meta_mask;
762 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset,
767 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe);
768 if (rxq->vlan_strip) {
772 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
773 vlan_strip = cqe->hdr_type_etc &
774 RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
776 vlan_strip = mcqe->hdr_type &
777 RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
779 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
780 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info);
783 if (rxq->hw_timestamp) {
784 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp);
786 if (rxq->rt_timestamp)
787 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts);
788 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts);
789 pkt->ol_flags |= rxq->timestamp_rx_flag;
794 * DPDK callback for RX.
797 * Generic pointer to RX queue structure.
799 * Array to store received packets.
801 * Maximum number of packets in array.
804 * Number of packets successfully received (<= pkts_n).
807 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
809 struct mlx5_rxq_data *rxq = dpdk_rxq;
810 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
811 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
812 const unsigned int sges_n = rxq->sges_n;
813 struct rte_mbuf *pkt = NULL;
814 struct rte_mbuf *seg = NULL;
815 volatile struct mlx5_cqe *cqe =
816 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
818 unsigned int rq_ci = rxq->rq_ci << sges_n;
819 int len = 0; /* keep its value across iterations. */
822 unsigned int idx = rq_ci & wqe_cnt;
823 volatile struct mlx5_wqe_data_seg *wqe =
824 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
825 struct rte_mbuf *rep = (*rxq->elts)[idx];
826 volatile struct mlx5_mini_cqe8 *mcqe = NULL;
834 /* Allocate the buf from the same pool. */
835 rep = rte_mbuf_raw_alloc(seg->pool);
836 if (unlikely(rep == NULL)) {
837 ++rxq->stats.rx_nombuf;
840 * no buffers before we even started,
846 MLX5_ASSERT(pkt != (*rxq->elts)[idx]);
850 rte_mbuf_raw_free(pkt);
859 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
860 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
862 rte_mbuf_raw_free(rep);
866 MLX5_ASSERT(len >= (rxq->crc_present << 2));
867 pkt->ol_flags &= EXT_ATTACHED_MBUF;
868 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
869 if (rxq->crc_present)
870 len -= RTE_ETHER_CRC_LEN;
872 if (cqe->lro_num_seg > 1) {
874 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe,
876 pkt->ol_flags |= PKT_RX_LRO;
877 pkt->tso_segsz = len / cqe->lro_num_seg;
880 DATA_LEN(rep) = DATA_LEN(seg);
881 PKT_LEN(rep) = PKT_LEN(seg);
882 SET_DATA_OFF(rep, DATA_OFF(seg));
883 PORT(rep) = PORT(seg);
884 (*rxq->elts)[idx] = rep;
886 * Fill NIC descriptor with the new buffer. The lkey and size
887 * of the buffers are already known, only the buffer address
890 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
891 /* If there's only one MR, no need to replace LKey in WQE. */
892 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
893 wqe->lkey = mlx5_rx_mb2mr(rxq, rep);
894 if (len > DATA_LEN(seg)) {
895 len -= DATA_LEN(seg);
901 #ifdef MLX5_PMD_SOFT_COUNTERS
902 /* Increment bytes counter. */
903 rxq->stats.ibytes += PKT_LEN(pkt);
910 /* Align consumer index to the next stride. */
915 if (unlikely(i == 0 && ((rq_ci >> sges_n) == rxq->rq_ci)))
917 /* Update the consumer index. */
918 rxq->rq_ci = rq_ci >> sges_n;
920 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
922 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
923 #ifdef MLX5_PMD_SOFT_COUNTERS
924 /* Increment packets counter. */
925 rxq->stats.ipackets += i;
931 * Update LRO packet TCP header.
932 * The HW LRO feature doesn't update the TCP header after coalescing the
933 * TCP segments but supplies information in CQE to fill it by SW.
936 * Pointer to the TCP header.
938 * Pointer to the completion entry.
940 * The L3 pseudo-header checksum.
943 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
944 volatile struct mlx5_cqe *__rte_restrict cqe,
945 uint32_t phcsum, uint8_t l4_type)
948 * The HW calculates only the TCP payload checksum, need to complete
949 * the TCP header checksum and the L3 pseudo-header checksum.
951 uint32_t csum = phcsum + cqe->csum;
953 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK ||
954 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) {
955 tcp->tcp_flags |= RTE_TCP_ACK_FLAG;
956 tcp->recv_ack = cqe->lro_ack_seq_num;
957 tcp->rx_win = cqe->lro_tcp_win;
959 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK)
960 tcp->tcp_flags |= RTE_TCP_PSH_FLAG;
962 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4);
963 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff);
964 csum = (~csum) & 0xffff;
971 * Update LRO packet headers.
972 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the
973 * TCP segments but supply information in CQE to fill it by SW.
976 * The packet address.
978 * Pointer to the completion entry.
983 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
984 volatile struct mlx5_cqe *__rte_restrict cqe,
985 volatile struct mlx5_mini_cqe8 *mcqe,
986 struct mlx5_rxq_data *rxq, uint32_t len)
989 struct rte_ether_hdr *eth;
990 struct rte_vlan_hdr *vlan;
991 struct rte_ipv4_hdr *ipv4;
992 struct rte_ipv6_hdr *ipv6;
993 struct rte_tcp_hdr *tcp;
998 uint16_t proto = h.eth->ether_type;
1003 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) ||
1004 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) {
1005 proto = h.vlan->eth_proto;
1008 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) {
1009 h.ipv4->time_to_live = cqe->lro_min_ttl;
1010 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd));
1011 h.ipv4->hdr_checksum = 0;
1012 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4);
1013 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0);
1016 h.ipv6->hop_limits = cqe->lro_min_ttl;
1017 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) -
1019 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0);
1023 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
1024 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) &
1025 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT;
1027 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) &
1028 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT;
1029 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type);
1033 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque)
1035 struct mlx5_mprq_buf *buf = opaque;
1037 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) {
1038 rte_mempool_put(buf->mp, buf);
1039 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1,
1040 __ATOMIC_RELAXED) == 0)) {
1041 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED);
1042 rte_mempool_put(buf->mp, buf);
1047 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf)
1049 mlx5_mprq_buf_free_cb(NULL, buf);
1053 * DPDK callback for RX with Multi-Packet RQ support.
1056 * Generic pointer to RX queue structure.
1058 * Array to store received packets.
1060 * Maximum number of packets in array.
1063 * Number of packets successfully received (<= pkts_n).
1066 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
1068 struct mlx5_rxq_data *rxq = dpdk_rxq;
1069 const uint32_t strd_n = 1 << rxq->strd_num_n;
1070 const uint32_t strd_sz = 1 << rxq->strd_sz_n;
1071 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1;
1072 const uint32_t wq_mask = (1 << rxq->elts_n) - 1;
1073 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
1075 uint32_t rq_ci = rxq->rq_ci;
1076 uint16_t consumed_strd = rxq->consumed_strd;
1077 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
1079 while (i < pkts_n) {
1080 struct rte_mbuf *pkt;
1086 volatile struct mlx5_mini_cqe8 *mcqe = NULL;
1087 enum mlx5_rqx_code rxq_code;
1089 if (consumed_strd == strd_n) {
1090 /* Replace WQE if the buffer is still in use. */
1091 mprq_buf_replace(rxq, rq_ci & wq_mask);
1092 /* Advance to the next WQE. */
1095 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
1097 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
1098 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
1102 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
1103 MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
1104 if (rxq->crc_present)
1105 len -= RTE_ETHER_CRC_LEN;
1107 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
1108 strd_cnt = (len / strd_sz) + !!(len % strd_sz);
1110 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
1111 MLX5_MPRQ_STRIDE_NUM_SHIFT;
1112 MLX5_ASSERT(strd_cnt);
1113 consumed_strd += strd_cnt;
1114 if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
1116 strd_idx = rte_be_to_cpu_16(mcqe == NULL ?
1119 MLX5_ASSERT(strd_idx < strd_n);
1120 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
1122 pkt = rte_pktmbuf_alloc(rxq->mp);
1123 if (unlikely(pkt == NULL)) {
1124 ++rxq->stats.rx_nombuf;
1127 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
1128 MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
1129 if (rxq->crc_present)
1130 len -= RTE_ETHER_CRC_LEN;
1131 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf,
1132 strd_idx, strd_cnt);
1133 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
1134 rte_pktmbuf_free_seg(pkt);
1135 if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
1136 ++rxq->stats.idropped;
1139 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
1140 ++rxq->stats.rx_nombuf;
1144 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
1145 if (cqe->lro_num_seg > 1) {
1146 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *),
1147 cqe, mcqe, rxq, len);
1148 pkt->ol_flags |= PKT_RX_LRO;
1149 pkt->tso_segsz = len / cqe->lro_num_seg;
1152 PORT(pkt) = rxq->port_id;
1153 #ifdef MLX5_PMD_SOFT_COUNTERS
1154 /* Increment bytes counter. */
1155 rxq->stats.ibytes += PKT_LEN(pkt);
1157 /* Return packet. */
1161 /* Update the consumer indexes. */
1162 rxq->consumed_strd = consumed_strd;
1164 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
1165 if (rq_ci != rxq->rq_ci) {
1168 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
1170 #ifdef MLX5_PMD_SOFT_COUNTERS
1171 /* Increment packets counter. */
1172 rxq->stats.ipackets += i;
1178 * Dummy DPDK callback for RX.
1180 * This function is used to temporarily replace the real callback during
1181 * unsafe control operations on the queue, or in case of error.
1184 * Generic pointer to RX queue structure.
1186 * Array to store received packets.
1188 * Maximum number of packets in array.
1191 * Number of packets successfully received (<= pkts_n).
1194 removed_rx_burst(void *dpdk_rxq __rte_unused,
1195 struct rte_mbuf **pkts __rte_unused,
1196 uint16_t pkts_n __rte_unused)
1203 * Vectorized Rx routines are not compiled in when required vector instructions
1204 * are not supported on a target architecture.
1205 * The following null stubs are needed for linkage when those are not included
1206 * outside of this file (e.g. mlx5_rxtx_vec_sse.c for x86).
1210 mlx5_rx_burst_vec(void *dpdk_rxq __rte_unused,
1211 struct rte_mbuf **pkts __rte_unused,
1212 uint16_t pkts_n __rte_unused)
1218 mlx5_rx_burst_mprq_vec(void *dpdk_rxq __rte_unused,
1219 struct rte_mbuf **pkts __rte_unused,
1220 uint16_t pkts_n __rte_unused)
1226 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
1232 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)