if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
!rxq_data->lro) {
cq_attr.cqe_comp_en = 1u;
- cq_attr.mini_cqe_res_format =
- mlx5_rxq_mprq_enabled(rxq_data) ?
- MLX5_CQE_RESP_FORMAT_CSUM_STRIDX :
- MLX5_CQE_RESP_FORMAT_HASH;
+ /*
+ * Select CSUM miniCQE format only for non-vectorized MPRQ
+ * Rx burst, use HASH miniCQE format for everything else.
+ */
+ if (mlx5_rxq_check_vec_support(rxq_data) < 0 &&
+ mlx5_rxq_mprq_enabled(rxq_data))
+ cq_attr.mini_cqe_res_format =
+ MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
+ else
+ cq_attr.mini_cqe_res_format =
+ MLX5_CQE_RESP_FORMAT_HASH;
/*
* For vectorized Rx, it must not be doubled in order to
* make cq_ci and rq_ci aligned.
if (dev->rx_pkt_burst == mlx5_rx_burst ||
dev->rx_pkt_burst == mlx5_rx_burst_mprq ||
- dev->rx_pkt_burst == mlx5_rx_burst_vec)
+ dev->rx_pkt_burst == mlx5_rx_burst_vec ||
+ dev->rx_pkt_burst == mlx5_rx_burst_mprq_vec)
return ptypes;
return NULL;
}
MLX5_ASSERT(dev != NULL);
if (mlx5_check_vec_rx_support(dev) > 0) {
- rx_pkt_burst = mlx5_rx_burst_vec;
- DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
- dev->data->port_id);
+ if (mlx5_mprq_enabled(dev)) {
+ rx_pkt_burst = mlx5_rx_burst_mprq_vec;
+ DRV_LOG(DEBUG, "port %u selected vectorized"
+ " MPRQ Rx function", dev->data->port_id);
+ } else {
+ rx_pkt_burst = mlx5_rx_burst_vec;
+ DRV_LOG(DEBUG, "port %u selected vectorized"
+ " SPRQ Rx function", dev->data->port_id);
+ }
} else if (mlx5_mprq_enabled(dev)) {
rx_pkt_burst = mlx5_rx_burst_mprq;
+ DRV_LOG(DEBUG, "port %u selected MPRQ Rx function",
+ dev->data->port_id);
+ } else {
+ DRV_LOG(DEBUG, "port %u selected SPRQ Rx function",
+ dev->data->port_id);
}
return rx_pkt_burst;
}
rxq->mprq_repl = buf;
}
DRV_LOG(DEBUG,
- "port %u Rx queue %u allocated and configured %u segments",
+ "port %u MPRQ queue %u allocated and configured %u segments",
rxq->port_id, rxq->idx, wqe_n);
return 0;
error:
(*rxq->mprq_bufs)[i]);
(*rxq->mprq_bufs)[i] = NULL;
}
- DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
+ DRV_LOG(DEBUG, "port %u MPRQ queue %u failed, freed everything",
rxq->port_id, rxq->idx);
rte_errno = err; /* Restore rte_errno. */
return -rte_errno;
rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
{
const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
- unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
+ unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
+ (1 << rxq_ctrl->rxq.elts_n) * (1 << rxq_ctrl->rxq.strd_num_n) :
+ (1 << rxq_ctrl->rxq.elts_n);
unsigned int i;
int err;
(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
}
DRV_LOG(DEBUG,
- "port %u Rx queue %u allocated and configured %u segments"
+ "port %u SPRQ queue %u allocated and configured %u segments"
" (max %u packets)",
PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
elts_n / (1 << rxq_ctrl->rxq.sges_n));
rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
(*rxq_ctrl->rxq.elts)[i] = NULL;
}
- DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
+ DRV_LOG(DEBUG, "port %u SPRQ queue %u failed, freed everything",
PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
rte_errno = err; /* Restore rte_errno. */
return -rte_errno;
int
rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
{
- return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
- rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
+ int ret = 0;
+
+ /**
+ * For MPRQ we need to allocate both MPRQ buffers
+ * for WQEs and simple mbufs for vector processing.
+ */
+ if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
+ ret = rxq_alloc_elts_mprq(rxq_ctrl);
+ return (ret || rxq_alloc_elts_sprq(rxq_ctrl));
}
/**
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
uint16_t i;
- DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
- rxq->port_id, rxq->idx);
+ DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing %d WRs",
+ rxq->port_id, rxq->idx, (1u << rxq->elts_n));
if (rxq->mprq_bufs == NULL)
return;
- MLX5_ASSERT(mlx5_rxq_check_vec_support(rxq) < 0);
for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
if ((*rxq->mprq_bufs)[i] != NULL)
mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]);
rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
{
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
- const uint16_t q_n = (1 << rxq->elts_n);
+ const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
+ (1 << rxq->elts_n) * (1 << rxq->strd_num_n) :
+ (1 << rxq->elts_n);
const uint16_t q_mask = q_n - 1;
uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
uint16_t i;
- DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
- PORT_ID(rxq_ctrl->priv), rxq->idx);
+ DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs",
+ PORT_ID(rxq_ctrl->priv), rxq->idx, q_n);
if (rxq->elts == NULL)
return;
/**
- * Some mbuf in the Ring belongs to the application. They cannot be
- * freed.
+ * Some mbuf in the Ring belongs to the application.
+ * They cannot be freed.
*/
if (mlx5_rxq_check_vec_support(rxq) > 0) {
for (i = 0; i < used; ++i)
(*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
rxq->rq_pi = rxq->rq_ci;
}
- for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
+ for (i = 0; i != q_n; ++i) {
if ((*rxq->elts)[i] != NULL)
rte_pktmbuf_free_seg((*rxq->elts)[i]);
(*rxq->elts)[i] = NULL;
static void
rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
{
+ /*
+ * For MPRQ we need to allocate both MPRQ buffers
+ * for WQEs and simple mbufs for vector processing.
+ */
if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
rxq_free_elts_mprq(rxq_ctrl);
- else
- rxq_free_elts_sprq(rxq_ctrl);
+ rxq_free_elts_sprq(rxq_ctrl);
}
/**
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *tmpl;
unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
- unsigned int mprq_stride_nums;
- unsigned int mprq_stride_size;
- unsigned int mprq_stride_cap;
struct mlx5_dev_config *config = &priv->config;
- /*
- * Always allocate extra slots, even if eventually
- * the vector Rx will not be used.
- */
- uint16_t desc_n =
- desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
uint64_t offloads = conf->offloads |
dev->data->dev_conf.rxmode.offloads;
unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO);
- const int mprq_en = mlx5_check_mprq_support(dev) > 0;
unsigned int max_rx_pkt_len = lro_on_queue ?
dev->data->dev_conf.rxmode.max_lro_pkt_size :
dev->data->dev_conf.rxmode.max_rx_pkt_len;
RTE_PKTMBUF_HEADROOM;
unsigned int max_lro_size = 0;
unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
+ const int mprq_en = mlx5_check_mprq_support(dev) > 0;
+ unsigned int mprq_stride_nums = config->mprq.stride_num_n ?
+ config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
+ unsigned int mprq_stride_size = non_scatter_min_mbuf_size <=
+ (1U << config->mprq.max_stride_size_n) ?
+ log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
+ unsigned int mprq_stride_cap = (config->mprq.stride_num_n ?
+ (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
+ (config->mprq.stride_size_n ?
+ (1U << config->mprq.stride_size_n) : (1U << mprq_stride_size));
+ /*
+ * Always allocate extra slots, even if eventually
+ * the vector Rx will not be used.
+ */
+ uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
if (non_scatter_min_mbuf_size > mb_len && !(offloads &
DEV_RX_OFFLOAD_SCATTER)) {
rte_errno = ENOSPC;
return NULL;
}
- tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
- desc_n * sizeof(struct rte_mbuf *), 0, socket);
+ tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
+ sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *) +
+ (desc >> mprq_stride_nums) * sizeof(struct mlx5_mprq_buf *),
+ 0, socket);
+
if (!tmpl) {
rte_errno = ENOMEM;
return NULL;
tmpl->socket = socket;
if (dev->data->dev_conf.intr_conf.rxq)
tmpl->irq = 1;
- mprq_stride_nums = config->mprq.stride_num_n ?
- config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
- mprq_stride_size = non_scatter_min_mbuf_size <=
- (1U << config->mprq.max_stride_size_n) ?
- log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
- mprq_stride_cap = (config->mprq.stride_num_n ?
- (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
- (config->mprq.stride_size_n ?
- (1U << config->mprq.stride_size_n) : (1U << mprq_stride_size));
/*
* This Rx queue can be configured as a Multi-Packet RQ if all of the
* following conditions are met:
tmpl->rxq.mp = mp;
tmpl->rxq.elts_n = log2above(desc);
tmpl->rxq.rq_repl_thresh =
- MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
+ MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n);
tmpl->rxq.elts =
- (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+ (struct rte_mbuf *(*)[desc_n])(tmpl + 1);
+ tmpl->rxq.mprq_bufs =
+ (struct mlx5_mprq_buf *(*)[desc])(*tmpl->rxq.elts + desc_n);
#ifndef RTE_ARCH_64
tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
#endif
#include <mlx5_prm.h>
#include <mlx5_common.h>
+#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
#include "mlx5.h"
#include "mlx5_mr.h"
#include "mlx5_utils.h"
#include "mlx5_rxtx.h"
-#include "mlx5_autoconf.h"
/* TX burst subroutines return codes. */
enum mlx5_txcmp_code {
rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res);
-static __rte_always_inline void
-mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx,
- const unsigned int strd_n);
-
static int
mlx5_queue_state_modify(struct rte_eth_dev *dev,
struct mlx5_mp_arg_queue_state_modify *sm);
struct rte_eth_burst_mode *mode)
{
eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq;
+ rxq = (*priv->rxqs)[rx_queue_id];
+ if (!rxq) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
if (pkt_burst == mlx5_rx_burst) {
snprintf(mode->info, sizeof(mode->info), "%s", "Scalar");
} else if (pkt_burst == mlx5_rx_burst_mprq) {
snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec");
#else
return -EINVAL;
+#endif
+ } else if (pkt_burst == mlx5_rx_burst_mprq_vec) {
+#if defined RTE_ARCH_X86_64
+ snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE");
+#elif defined RTE_ARCH_ARM64
+ snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon");
+#elif defined RTE_ARCH_PPC_64
+ snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec");
+#else
+ return -EINVAL;
#endif
} else {
return -EINVAL;
rxq->zip = (struct rxq_zip){
.ai = 0,
};
+ rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ?
+ (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0;
/* Update doorbell counter. */
rxq->rq_ci = wqe_n >> rxq->sges_n;
rte_io_wmb();
{
const uint16_t cqe_n = 1 << rxq->cqe_n;
const uint16_t cqe_mask = cqe_n - 1;
- const unsigned int wqe_n = 1 << rxq->elts_n;
+ const uint16_t wqe_n = 1 << rxq->elts_n;
+ const uint16_t strd_n = 1 << rxq->strd_num_n;
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
union {
&sm))
return -1;
if (vec) {
- const uint16_t q_mask = wqe_n - 1;
- uint16_t elt_idx;
+ const uint32_t elts_n =
+ mlx5_rxq_mprq_enabled(rxq) ?
+ wqe_n * strd_n : wqe_n;
+ const uint32_t e_mask = elts_n - 1;
+ uint32_t elts_ci =
+ mlx5_rxq_mprq_enabled(rxq) ?
+ rxq->elts_ci : rxq->rq_ci;
+ uint32_t elt_idx;
struct rte_mbuf **elt;
int i;
- unsigned int n = wqe_n - (rxq->rq_ci -
+ unsigned int n = elts_n - (elts_ci -
rxq->rq_pi);
for (i = 0; i < (int)n; ++i) {
- elt_idx = (rxq->rq_ci + i) & q_mask;
+ elt_idx = (elts_ci + i) & e_mask;
elt = &(*rxq->elts)[elt_idx];
*elt = rte_mbuf_raw_alloc(rxq->mp);
if (!*elt) {
for (i--; i >= 0; --i) {
- elt_idx = (rxq->rq_ci +
- i) & q_mask;
+ elt_idx = (elts_ci +
+ i) & elts_n;
elt = &(*rxq->elts)
[elt_idx];
rte_pktmbuf_free_seg
return -1;
}
}
- for (i = 0; i < (int)wqe_n; ++i) {
+ for (i = 0; i < (int)elts_n; ++i) {
elt = &(*rxq->elts)[i];
DATA_LEN(*elt) =
(uint16_t)((*elt)->buf_len -
}
/* Padding with a fake mbuf for vec Rx. */
for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
- (*rxq->elts)[wqe_n + i] =
+ (*rxq->elts)[elts_n + i] =
&rxq->fake_mbuf;
}
mlx5_rxq_initialize(rxq);
mlx5_mprq_buf_free_cb(NULL, buf);
}
-static inline void
-mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx,
- const unsigned int strd_n)
-{
- struct mlx5_mprq_buf *rep = rxq->mprq_repl;
- volatile struct mlx5_wqe_data_seg *wqe =
- &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
- void *addr;
-
- MLX5_ASSERT(rep != NULL);
- /* Replace MPRQ buf. */
- (*rxq->mprq_bufs)[rq_idx] = rep;
- /* Replace WQE. */
- addr = mlx5_mprq_buf_addr(rep, strd_n);
- wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
- /* If there's only one MR, no need to replace LKey in WQE. */
- if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
- wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
- /* Stash a mbuf for next replacement. */
- if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
- rxq->mprq_repl = rep;
- else
- rxq->mprq_repl = NULL;
-}
-
/**
* DPDK callback for RX with Multi-Packet RQ support.
*
mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct mlx5_rxq_data *rxq = dpdk_rxq;
- const unsigned int strd_n = 1 << rxq->strd_num_n;
- const unsigned int strd_sz = 1 << rxq->strd_sz_n;
- const unsigned int strd_shift =
- MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
- const unsigned int cq_mask = (1 << rxq->cqe_n) - 1;
- const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
+ const uint32_t strd_n = 1 << rxq->strd_num_n;
+ const uint32_t cq_mask = (1 << rxq->cqe_n) - 1;
+ const uint32_t wq_mask = (1 << rxq->elts_n) - 1;
volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
unsigned int i = 0;
uint32_t rq_ci = rxq->rq_ci;
while (i < pkts_n) {
struct rte_mbuf *pkt;
- void *addr;
int ret;
uint32_t len;
uint16_t strd_cnt;
uint16_t strd_idx;
- uint32_t offset;
uint32_t byte_cnt;
- int32_t hdrm_overlap;
volatile struct mlx5_mini_cqe8 *mcqe = NULL;
uint32_t rss_hash_res = 0;
+ enum mlx5_rqx_code rxq_code;
if (consumed_strd == strd_n) {
- /* Replace WQE only if the buffer is still in use. */
- if (__atomic_load_n(&buf->refcnt,
- __ATOMIC_RELAXED) > 1) {
- mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n);
- /* Release the old buffer. */
- mlx5_mprq_buf_free(buf);
- } else if (unlikely(rxq->mprq_repl == NULL)) {
- struct mlx5_mprq_buf *rep;
-
- /*
- * Currently, the MPRQ mempool is out of buffer
- * and doing memcpy regardless of the size of Rx
- * packet. Retry allocation to get back to
- * normal.
- */
- if (!rte_mempool_get(rxq->mprq_mp,
- (void **)&rep))
- rxq->mprq_repl = rep;
- }
+ /* Replace WQE if the buffer is still in use. */
+ mprq_buf_replace(rxq, rq_ci & wq_mask);
/* Advance to the next WQE. */
consumed_strd = 0;
++rq_ci;
MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
if (rxq->crc_present)
len -= RTE_ETHER_CRC_LEN;
- offset = strd_idx * strd_sz + strd_shift;
- addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
- hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
- /*
- * Memcpy packets to the target mbuf if:
- * - The size of packet is smaller than mprq_max_memcpy_len.
- * - Out of buffer in the Mempool for Multi-Packet RQ.
- * - The packet's stride overlaps a headroom and scatter is off.
- */
- if (len <= rxq->mprq_max_memcpy_len ||
- rxq->mprq_repl == NULL ||
- (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
- if (likely(rte_pktmbuf_tailroom(pkt) >= len)) {
- rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
- addr, len);
- DATA_LEN(pkt) = len;
- } else if (rxq->strd_scatter_en) {
- struct rte_mbuf *prev = pkt;
- uint32_t seg_len =
- RTE_MIN(rte_pktmbuf_tailroom(pkt), len);
- uint32_t rem_len = len - seg_len;
-
- rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
- addr, seg_len);
- DATA_LEN(pkt) = seg_len;
- while (rem_len) {
- struct rte_mbuf *next =
- rte_pktmbuf_alloc(rxq->mp);
-
- if (unlikely(next == NULL)) {
- rte_pktmbuf_free(pkt);
- ++rxq->stats.rx_nombuf;
- goto out;
- }
- NEXT(prev) = next;
- SET_DATA_OFF(next, 0);
- addr = RTE_PTR_ADD(addr, seg_len);
- seg_len = RTE_MIN
- (rte_pktmbuf_tailroom(next),
- rem_len);
- rte_memcpy
- (rte_pktmbuf_mtod(next, void *),
- addr, seg_len);
- DATA_LEN(next) = seg_len;
- rem_len -= seg_len;
- prev = next;
- ++NB_SEGS(pkt);
- }
- } else {
- rte_pktmbuf_free_seg(pkt);
+ rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf,
+ strd_idx, strd_cnt);
+ if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
+ rte_pktmbuf_free_seg(pkt);
+ if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
++rxq->stats.idropped;
continue;
}
- } else {
- rte_iova_t buf_iova;
- struct rte_mbuf_ext_shared_info *shinfo;
- uint16_t buf_len = strd_cnt * strd_sz;
- void *buf_addr;
-
- /* Increment the refcnt of the whole chunk. */
- __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED);
- MLX5_ASSERT(__atomic_load_n(&buf->refcnt,
- __ATOMIC_RELAXED) <= strd_n + 1);
- buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
- /*
- * MLX5 device doesn't use iova but it is necessary in a
- * case where the Rx packet is transmitted via a
- * different PMD.
- */
- buf_iova = rte_mempool_virt2iova(buf) +
- RTE_PTR_DIFF(buf_addr, buf);
- shinfo = &buf->shinfos[strd_idx];
- rte_mbuf_ext_refcnt_set(shinfo, 1);
- /*
- * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
- * attaching the stride to mbuf and more offload flags
- * will be added below by calling rxq_cq_to_mbuf().
- * Other fields will be overwritten.
- */
- rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
- buf_len, shinfo);
- /* Set mbuf head-room. */
- SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
- MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
- MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >=
- len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
- DATA_LEN(pkt) = len;
- /*
- * Copy the last fragment of a packet (up to headroom
- * size bytes) in case there is a stride overlap with
- * a next packet's headroom. Allocate a separate mbuf
- * to store this fragment and link it. Scatter is on.
- */
- if (hdrm_overlap > 0) {
- MLX5_ASSERT(rxq->strd_scatter_en);
- struct rte_mbuf *seg =
- rte_pktmbuf_alloc(rxq->mp);
-
- if (unlikely(seg == NULL)) {
- rte_pktmbuf_free_seg(pkt);
- ++rxq->stats.rx_nombuf;
- break;
- }
- SET_DATA_OFF(seg, 0);
- rte_memcpy(rte_pktmbuf_mtod(seg, void *),
- RTE_PTR_ADD(addr, len - hdrm_overlap),
- hdrm_overlap);
- DATA_LEN(seg) = hdrm_overlap;
- DATA_LEN(pkt) = len - hdrm_overlap;
- NEXT(pkt) = seg;
- NB_SEGS(pkt) = 2;
+ if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
+ ++rxq->stats.rx_nombuf;
+ break;
}
}
rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
if (cqe->lro_num_seg > 1) {
- mlx5_lro_update_hdr(addr, cqe, len);
+ mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *),
+ cqe, len);
pkt->ol_flags |= PKT_RX_LRO;
pkt->tso_segsz = len / cqe->lro_num_seg;
}
*(pkts++) = pkt;
++i;
}
-out:
/* Update the consumer indexes. */
rxq->consumed_strd = consumed_strd;
rte_io_wmb();
return 0;
}
+__rte_weak uint16_t
+mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused,
+ struct rte_mbuf **pkts __rte_unused,
+ uint16_t pkts_n __rte_unused)
+{
+ return 0;
+}
+
__rte_weak int
mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
{
#include "mlx5_utils.h"
#include "mlx5.h"
#include "mlx5_autoconf.h"
+#include "mlx5_mr.h"
/* Support tunnel matching. */
#define MLX5_FLOW_TUNNEL 10
MLX5_RXQ_ERR_STATE_NEED_READY,
};
+enum mlx5_rqx_code {
+ MLX5_RXQ_CODE_EXIT = 0,
+ MLX5_RXQ_CODE_NOMBUF,
+ MLX5_RXQ_CODE_DROPPED,
+};
+
/* RX queue descriptor. */
struct mlx5_rxq_data {
unsigned int csum:1; /* Enable checksum offloading. */
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t port_id;
+ uint32_t elts_ci;
uint32_t rq_ci;
uint16_t consumed_strd; /* Number of consumed strides in WQE. */
uint32_t rq_pi;
uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */
volatile void *wqes;
volatile struct mlx5_cqe(*cqes)[];
- RTE_STD_C11
- union {
- struct rte_mbuf *(*elts)[];
- struct mlx5_mprq_buf *(*mprq_bufs)[];
- };
+ struct rte_mbuf *(*elts)[];
+ struct mlx5_mprq_buf *(*mprq_bufs)[];
struct rte_mempool *mp;
struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
int mlx5_check_vec_rx_support(struct rte_eth_dev *dev);
uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
+uint16_t mlx5_rx_burst_mprq_vec(void *dpdk_txq, struct rte_mbuf **pkts,
+ uint16_t pkts_n);
/* mlx5_mr.c */
*RTE_MBUF_DYNFIELD(mbuf, offset, rte_mbuf_timestamp_t *) = timestamp;
}
+/**
+ * Replace MPRQ buffer.
+ *
+ * @param rxq
+ * Pointer to Rx queue structure.
+ * @param rq_idx
+ * RQ index to replace.
+ */
+static __rte_always_inline void
+mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx)
+{
+ const uint32_t strd_n = 1 << rxq->strd_num_n;
+ struct mlx5_mprq_buf *rep = rxq->mprq_repl;
+ volatile struct mlx5_wqe_data_seg *wqe =
+ &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
+ struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_idx];
+ void *addr;
+
+ if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) > 1) {
+ MLX5_ASSERT(rep != NULL);
+ /* Replace MPRQ buf. */
+ (*rxq->mprq_bufs)[rq_idx] = rep;
+ /* Replace WQE. */
+ addr = mlx5_mprq_buf_addr(rep, strd_n);
+ wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
+ /* If there's only one MR, no need to replace LKey in WQE. */
+ if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
+ wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
+ /* Stash a mbuf for next replacement. */
+ if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
+ rxq->mprq_repl = rep;
+ else
+ rxq->mprq_repl = NULL;
+ /* Release the old buffer. */
+ mlx5_mprq_buf_free(buf);
+ } else if (unlikely(rxq->mprq_repl == NULL)) {
+ struct mlx5_mprq_buf *rep;
+
+ /*
+ * Currently, the MPRQ mempool is out of buffer
+ * and doing memcpy regardless of the size of Rx
+ * packet. Retry allocation to get back to
+ * normal.
+ */
+ if (!rte_mempool_get(rxq->mprq_mp, (void **)&rep))
+ rxq->mprq_repl = rep;
+ }
+}
+
+/**
+ * Attach or copy MPRQ buffer content to a packet.
+ *
+ * @param rxq
+ * Pointer to Rx queue structure.
+ * @param pkt
+ * Pointer to a packet to fill.
+ * @param len
+ * Packet length.
+ * @param buf
+ * Pointer to a MPRQ buffer to take the data from.
+ * @param strd_idx
+ * Stride index to start from.
+ * @param strd_cnt
+ * Number of strides to consume.
+ */
+static __rte_always_inline enum mlx5_rqx_code
+mprq_buf_to_pkt(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, uint32_t len,
+ struct mlx5_mprq_buf *buf, uint16_t strd_idx, uint16_t strd_cnt)
+{
+ const uint32_t strd_n = 1 << rxq->strd_num_n;
+ const uint16_t strd_sz = 1 << rxq->strd_sz_n;
+ const uint16_t strd_shift =
+ MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
+ const int32_t hdrm_overlap =
+ len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
+ const uint32_t offset = strd_idx * strd_sz + strd_shift;
+ void *addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
+
+ /*
+ * Memcpy packets to the target mbuf if:
+ * - The size of packet is smaller than mprq_max_memcpy_len.
+ * - Out of buffer in the Mempool for Multi-Packet RQ.
+ * - The packet's stride overlaps a headroom and scatter is off.
+ */
+ if (len <= rxq->mprq_max_memcpy_len ||
+ rxq->mprq_repl == NULL ||
+ (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
+ if (likely(len <=
+ (uint32_t)(pkt->buf_len - RTE_PKTMBUF_HEADROOM))) {
+ rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
+ addr, len);
+ DATA_LEN(pkt) = len;
+ } else if (rxq->strd_scatter_en) {
+ struct rte_mbuf *prev = pkt;
+ uint32_t seg_len = RTE_MIN(len, (uint32_t)
+ (pkt->buf_len - RTE_PKTMBUF_HEADROOM));
+ uint32_t rem_len = len - seg_len;
+
+ rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
+ addr, seg_len);
+ DATA_LEN(pkt) = seg_len;
+ while (rem_len) {
+ struct rte_mbuf *next =
+ rte_pktmbuf_alloc(rxq->mp);
+
+ if (unlikely(next == NULL))
+ return MLX5_RXQ_CODE_NOMBUF;
+ NEXT(prev) = next;
+ SET_DATA_OFF(next, 0);
+ addr = RTE_PTR_ADD(addr, seg_len);
+ seg_len = RTE_MIN(rem_len, (uint32_t)
+ (next->buf_len - RTE_PKTMBUF_HEADROOM));
+ rte_memcpy
+ (rte_pktmbuf_mtod(next, void *),
+ addr, seg_len);
+ DATA_LEN(next) = seg_len;
+ rem_len -= seg_len;
+ prev = next;
+ ++NB_SEGS(pkt);
+ }
+ } else {
+ return MLX5_RXQ_CODE_DROPPED;
+ }
+ } else {
+ rte_iova_t buf_iova;
+ struct rte_mbuf_ext_shared_info *shinfo;
+ uint16_t buf_len = strd_cnt * strd_sz;
+ void *buf_addr;
+
+ /* Increment the refcnt of the whole chunk. */
+ __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED);
+ MLX5_ASSERT(__atomic_load_n(&buf->refcnt,
+ __ATOMIC_RELAXED) <= strd_n + 1);
+ buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
+ /*
+ * MLX5 device doesn't use iova but it is necessary in a
+ * case where the Rx packet is transmitted via a
+ * different PMD.
+ */
+ buf_iova = rte_mempool_virt2iova(buf) +
+ RTE_PTR_DIFF(buf_addr, buf);
+ shinfo = &buf->shinfos[strd_idx];
+ rte_mbuf_ext_refcnt_set(shinfo, 1);
+ /*
+ * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
+ * attaching the stride to mbuf and more offload flags
+ * will be added below by calling rxq_cq_to_mbuf().
+ * Other fields will be overwritten.
+ */
+ rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
+ buf_len, shinfo);
+ /* Set mbuf head-room. */
+ SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
+ MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
+ MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >=
+ len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
+ DATA_LEN(pkt) = len;
+ /*
+ * Copy the last fragment of a packet (up to headroom
+ * size bytes) in case there is a stride overlap with
+ * a next packet's headroom. Allocate a separate mbuf
+ * to store this fragment and link it. Scatter is on.
+ */
+ if (hdrm_overlap > 0) {
+ MLX5_ASSERT(rxq->strd_scatter_en);
+ struct rte_mbuf *seg =
+ rte_pktmbuf_alloc(rxq->mp);
+
+ if (unlikely(seg == NULL))
+ return MLX5_RXQ_CODE_NOMBUF;
+ SET_DATA_OFF(seg, 0);
+ rte_memcpy(rte_pktmbuf_mtod(seg, void *),
+ RTE_PTR_ADD(addr, len - hdrm_overlap),
+ hdrm_overlap);
+ DATA_LEN(seg) = hdrm_overlap;
+ DATA_LEN(pkt) = len - hdrm_overlap;
+ NEXT(pkt) = seg;
+ NB_SEGS(pkt) = 2;
+ }
+ }
+ return MLX5_RXQ_CODE_EXIT;
+}
+
#endif /* RTE_PMD_MLX5_RXTX_H_ */
return n;
}
+/**
+ * Replenish buffers for RX in bulk.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ */
+static inline void
+mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
+{
+ const uint16_t q_n = 1 << rxq->elts_n;
+ const uint16_t q_mask = q_n - 1;
+ uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
+ uint16_t elts_idx = rxq->rq_ci & q_mask;
+ struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+ volatile struct mlx5_wqe_data_seg *wq =
+ &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
+ unsigned int i;
+
+ if (n >= rxq->rq_repl_thresh) {
+ MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
+ MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
+ MLX5_VPMD_DESCS_PER_LOOP);
+ /* Not to cross queue end. */
+ n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
+ if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+ rxq->stats.rx_nombuf += n;
+ return;
+ }
+ for (i = 0; i < n; ++i) {
+ void *buf_addr;
+
+ /*
+ * In order to support the mbufs with external attached
+ * data buffer we should use the buf_addr pointer
+ * instead of rte_mbuf_buf_addr(). It touches the mbuf
+ * itself and may impact the performance.
+ */
+ buf_addr = elts[i]->buf_addr;
+ wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
+ RTE_PKTMBUF_HEADROOM);
+ /* If there's a single MR, no need to replace LKey. */
+ if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
+ > 1))
+ wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
+ }
+ rxq->rq_ci += n;
+ /* Prevent overflowing into consumed mbufs. */
+ elts_idx = rxq->rq_ci & q_mask;
+ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+ (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
+ rte_io_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ }
+}
+
+/**
+ * Replenish buffers for MPRQ RX in bulk.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ */
+static inline void
+mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
+{
+ const uint16_t wqe_n = 1 << rxq->elts_n;
+ const uint32_t strd_n = 1 << rxq->strd_num_n;
+ const uint32_t elts_n = wqe_n * strd_n;
+ const uint32_t wqe_mask = elts_n - 1;
+ uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
+ uint32_t elts_idx = rxq->elts_ci & wqe_mask;
+ struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+
+ /* Not to cross queue end. */
+ if (n >= rxq->rq_repl_thresh) {
+ MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
+ MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
+ MLX5_VPMD_DESCS_PER_LOOP);
+ n = RTE_MIN(n, elts_n - elts_idx);
+ if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+ rxq->stats.rx_nombuf += n;
+ return;
+ }
+ rxq->elts_ci += n;
+ }
+}
+
+/**
+ * Copy or attach MPRQ buffers to RX SW ring.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param pkts
+ * Pointer to array of packets to be stored.
+ * @param pkts_n
+ * Number of packets to be stored.
+ *
+ * @return
+ * Number of packets successfully copied/attached (<= pkts_n).
+ */
+static inline uint16_t
+rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
+ struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ const uint16_t wqe_n = 1 << rxq->elts_n;
+ const uint16_t wqe_mask = wqe_n - 1;
+ const uint16_t strd_sz = 1 << rxq->strd_sz_n;
+ const uint32_t strd_n = 1 << rxq->strd_num_n;
+ const uint32_t elts_n = wqe_n * strd_n;
+ const uint32_t elts_mask = elts_n - 1;
+ uint32_t elts_idx = rxq->rq_pi & elts_mask;
+ struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+ uint32_t rq_ci = rxq->rq_ci;
+ struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
+ uint16_t copied = 0;
+ uint16_t i = 0;
+
+ for (i = 0; i < pkts_n; ++i) {
+ uint16_t strd_cnt;
+ enum mlx5_rqx_code rxq_code;
+
+ if (rxq->consumed_strd == strd_n) {
+ /* Replace WQE if the buffer is still in use. */
+ mprq_buf_replace(rxq, rq_ci & wqe_mask);
+ /* Advance to the next WQE. */
+ rxq->consumed_strd = 0;
+ rq_ci++;
+ buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
+ }
+
+ if (!elts[i]->pkt_len) {
+ rxq->consumed_strd = strd_n;
+ rte_pktmbuf_free_seg(elts[i]);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ rxq->stats.ipackets -= 1;
+#endif
+ continue;
+ }
+ strd_cnt = (elts[i]->pkt_len / strd_sz) +
+ ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
+ rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
+ buf, rxq->consumed_strd, strd_cnt);
+ rxq->consumed_strd += strd_cnt;
+ if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
+ rte_pktmbuf_free_seg(elts[i]);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ rxq->stats.ipackets -= 1;
+ rxq->stats.ibytes -= elts[i]->pkt_len;
+#endif
+ if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
+ ++rxq->stats.rx_nombuf;
+ break;
+ }
+ if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
+ ++rxq->stats.idropped;
+ continue;
+ }
+ }
+ pkts[copied++] = elts[i];
+ }
+ rxq->rq_pi += i;
+ rxq->cq_ci += i;
+ rte_io_wmb();
+ *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+ if (rq_ci != rxq->rq_ci) {
+ rxq->rq_ci = rq_ci;
+ rte_io_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ }
+ return copied;
+}
+
/**
* Receive burst of packets. An errored completion also consumes a mbuf, but the
* packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
bool no_cq = false;
do {
- nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, &err, &no_cq);
+ nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
+ &err, &no_cq);
+ if (unlikely(err | rxq->err_state))
+ nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
+ tn += nb_rx;
+ if (unlikely(no_cq))
+ break;
+ } while (tn != pkts_n);
+ return tn;
+}
+
+/**
+ * Receive burst of packets. An errored completion also consumes a mbuf, but the
+ * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
+ * before returning to application.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ * @param[out] err
+ * Pointer to a flag. Set non-zero value if pkts array has at least one error
+ * packet to handle.
+ * @param[out] no_cq
+ * Pointer to a boolean. Set true if no new CQE seen.
+ *
+ * @return
+ * Number of packets received including errors (<= pkts_n).
+ */
+static inline uint16_t
+rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
+ uint16_t pkts_n, uint64_t *err, bool *no_cq)
+{
+ const uint16_t q_n = 1 << rxq->cqe_n;
+ const uint16_t q_mask = q_n - 1;
+ const uint16_t wqe_n = 1 << rxq->elts_n;
+ const uint32_t strd_n = 1 << rxq->strd_num_n;
+ const uint32_t elts_n = wqe_n * strd_n;
+ const uint32_t elts_mask = elts_n - 1;
+ volatile struct mlx5_cqe *cq;
+ struct rte_mbuf **elts;
+ uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
+ uint16_t nocmp_n = 0;
+ uint16_t rcvd_pkt = 0;
+ uint16_t cp_pkt = 0;
+ unsigned int cq_idx = rxq->cq_ci & q_mask;
+ unsigned int elts_idx;
+
+ MLX5_ASSERT(rxq->sges_n == 0);
+ cq = &(*rxq->cqes)[cq_idx];
+ rte_prefetch0(cq);
+ rte_prefetch0(cq + 1);
+ rte_prefetch0(cq + 2);
+ rte_prefetch0(cq + 3);
+ pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
+ mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
+ /* See if there're unreturned mbufs from compressed CQE. */
+ rcvd_pkt = rxq->decompressed;
+ if (rcvd_pkt > 0) {
+ rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
+ cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
+ rxq->decompressed -= rcvd_pkt;
+ pkts += cp_pkt;
+ }
+ elts_idx = rxq->rq_pi & elts_mask;
+ elts = &(*rxq->elts)[elts_idx];
+ /* Not to overflow pkts array. */
+ pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
+ /* Not to cross queue end. */
+ pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
+ pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
+ /* Not to move past the allocated mbufs. */
+ pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
+ if (!pkts_n) {
+ *no_cq = !cp_pkt;
+ return cp_pkt;
+ }
+ /* At this point, there shouldn't be any remaining packets. */
+ MLX5_ASSERT(rxq->decompressed == 0);
+ /* Process all the CQEs */
+ nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
+ /* If no new CQE seen, return without updating cq_db. */
+ if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
+ *no_cq = true;
+ return cp_pkt;
+ }
+ /* Update the consumer indexes for non-compressed CQEs. */
+ MLX5_ASSERT(nocmp_n <= pkts_n);
+ cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
+ rcvd_pkt += cp_pkt;
+ /* Decompress the last CQE if compressed. */
+ if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
+ MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
+ rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
+ &elts[nocmp_n]);
+ /* Return more packets if needed. */
+ if (nocmp_n < pkts_n) {
+ uint16_t n = rxq->decompressed;
+
+ n = RTE_MIN(n, pkts_n - nocmp_n);
+ cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
+ rcvd_pkt += cp_pkt;
+ rxq->decompressed -= n;
+ }
+ }
+ *no_cq = !rcvd_pkt;
+ return rcvd_pkt;
+}
+
+/**
+ * DPDK callback for vectorized MPRQ RX.
+ *
+ * @param dpdk_rxq
+ * Generic pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ *
+ * @return
+ * Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ struct mlx5_rxq_data *rxq = dpdk_rxq;
+ uint16_t nb_rx = 0;
+ uint16_t tn = 0;
+ uint64_t err = 0;
+ bool no_cq = false;
+
+ do {
+ nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
+ &err, &no_cq);
if (unlikely(err | rxq->err_state))
nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
tn += nb_rx;
struct mlx5_rxq_ctrl *ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
- if (mlx5_mprq_enabled(ETH_DEV(ctrl->priv)))
- return -ENOTSUP;
if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
return -ENOTSUP;
if (rxq->lro)
return -ENOTSUP;
if (!priv->config.rx_vec_en)
return -ENOTSUP;
- if (mlx5_mprq_enabled(dev))
- return -ENOTSUP;
/* All the configured queues should support. */
for (i = 0; i < priv->rxqs_n; ++i) {
struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
#include <mlx5_prm.h>
#include "mlx5_autoconf.h"
-
#include "mlx5_mr.h"
/* HW checksum offload capabilities of vectorized Tx. */
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, op_own) ==
offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);
-/**
- * Replenish buffers for RX in bulk.
- *
- * @param rxq
- * Pointer to RX queue structure.
- */
-static inline void
-mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
-{
- const uint16_t q_n = 1 << rxq->elts_n;
- const uint16_t q_mask = q_n - 1;
- uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
- uint16_t elts_idx = rxq->rq_ci & q_mask;
- struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
- volatile struct mlx5_wqe_data_seg *wq =
- &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
- unsigned int i;
-
- if (n >= rxq->rq_repl_thresh) {
- MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
- MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
- MLX5_VPMD_DESCS_PER_LOOP);
- /* Not to cross queue end. */
- n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
- if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
- rxq->stats.rx_nombuf += n;
- return;
- }
- for (i = 0; i < n; ++i) {
- void *buf_addr;
-
- /*
- * In order to support the mbufs with external attached
- * data buffer we should use the buf_addr pointer
- * instead of rte_mbuf_buf_addr(). It touches the mbuf
- * itself and may impact the performance.
- */
- buf_addr = elts[i]->buf_addr;
- wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
- RTE_PKTMBUF_HEADROOM);
- /* If there's a single MR, no need to replace LKey. */
- if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
- > 1))
- wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
- }
- rxq->rq_ci += n;
- /* Prevent overflowing into consumed mbufs. */
- elts_idx = rxq->rq_ci & q_mask;
- for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
- (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
- rte_io_wmb();
- *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
- }
-}
-
#endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */