net/mlx5: implement vectorized MPRQ burst
authorAlexander Kozyrev <akozyrev@nvidia.com>
Wed, 21 Oct 2020 20:30:30 +0000 (20:30 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 3 Nov 2020 22:24:25 +0000 (23:24 +0100)
MPRQ (Multi-Packet Rx Queue) processes one packet at a time using
simple scalar instructions. MPRQ works by posting a single large buffer
(consisted of multiple fixed-size strides) in order to receive multiple
packets at once on this buffer. A Rx packet is then copied to a
user-provided mbuf or PMD attaches the Rx packet to the mbuf by the
pointer to an external buffer.

There is an opportunity to speed up the packet receiving by processing
4 packets simultaneously using SIMD (single instruction, multiple data)
extensions. Allocate mbufs in batches for every MPRQ buffer and process
the packets in groups of 4 until all the strides are exhausted. Then
switch to another MPRQ buffer and repeat the process over again.

The vectorized MPRQ burst routine is engaged automatically in case
the mprq_en=1 devarg is specified and the vectorization is not disabled
explicitly by providing rx_vec_en=0 devarg. There is a limitation:
LRO is not supported and scalar MPRQ is selected if it is on.

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
drivers/net/mlx5/mlx5_devx.c
drivers/net/mlx5/mlx5_ethdev.c
drivers/net/mlx5/mlx5_rxq.c
drivers/net/mlx5/mlx5_rxtx.c
drivers/net/mlx5/mlx5_rxtx.h
drivers/net/mlx5/mlx5_rxtx_vec.c
drivers/net/mlx5/mlx5_rxtx_vec.h

index 11bda32..0c99fe7 100644 (file)
@@ -437,10 +437,17 @@ mlx5_rxq_create_devx_cq_resources(struct rte_eth_dev *dev, uint16_t idx)
        if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
            !rxq_data->lro) {
                cq_attr.cqe_comp_en = 1u;
-               cq_attr.mini_cqe_res_format =
-                               mlx5_rxq_mprq_enabled(rxq_data) ?
-                                       MLX5_CQE_RESP_FORMAT_CSUM_STRIDX :
-                                       MLX5_CQE_RESP_FORMAT_HASH;
+               /*
+                * Select CSUM miniCQE format only for non-vectorized MPRQ
+                * Rx burst, use HASH miniCQE format for everything else.
+                */
+               if (mlx5_rxq_check_vec_support(rxq_data) < 0 &&
+                       mlx5_rxq_mprq_enabled(rxq_data))
+                       cq_attr.mini_cqe_res_format =
+                               MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
+               else
+                       cq_attr.mini_cqe_res_format =
+                               MLX5_CQE_RESP_FORMAT_HASH;
                /*
                 * For vectorized Rx, it must not be doubled in order to
                 * make cq_ci and rq_ci aligned.
index 76ef026..623f969 100644 (file)
@@ -427,7 +427,8 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 
        if (dev->rx_pkt_burst == mlx5_rx_burst ||
            dev->rx_pkt_burst == mlx5_rx_burst_mprq ||
-           dev->rx_pkt_burst == mlx5_rx_burst_vec)
+           dev->rx_pkt_burst == mlx5_rx_burst_vec ||
+           dev->rx_pkt_burst == mlx5_rx_burst_mprq_vec)
                return ptypes;
        return NULL;
 }
@@ -486,11 +487,22 @@ mlx5_select_rx_function(struct rte_eth_dev *dev)
 
        MLX5_ASSERT(dev != NULL);
        if (mlx5_check_vec_rx_support(dev) > 0) {
-               rx_pkt_burst = mlx5_rx_burst_vec;
-               DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
-                       dev->data->port_id);
+               if (mlx5_mprq_enabled(dev)) {
+                       rx_pkt_burst = mlx5_rx_burst_mprq_vec;
+                       DRV_LOG(DEBUG, "port %u selected vectorized"
+                               " MPRQ Rx function", dev->data->port_id);
+               } else {
+                       rx_pkt_burst = mlx5_rx_burst_vec;
+                       DRV_LOG(DEBUG, "port %u selected vectorized"
+                               " SPRQ Rx function", dev->data->port_id);
+               }
        } else if (mlx5_mprq_enabled(dev)) {
                rx_pkt_burst = mlx5_rx_burst_mprq;
+               DRV_LOG(DEBUG, "port %u selected MPRQ Rx function",
+                       dev->data->port_id);
+       } else {
+               DRV_LOG(DEBUG, "port %u selected SPRQ Rx function",
+                       dev->data->port_id);
        }
        return rx_pkt_burst;
 }
index e2cf155..668caf8 100644 (file)
@@ -173,7 +173,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
                        rxq->mprq_repl = buf;
        }
        DRV_LOG(DEBUG,
-               "port %u Rx queue %u allocated and configured %u segments",
+               "port %u MPRQ queue %u allocated and configured %u segments",
                rxq->port_id, rxq->idx, wqe_n);
        return 0;
 error:
@@ -185,7 +185,7 @@ error:
                                        (*rxq->mprq_bufs)[i]);
                (*rxq->mprq_bufs)[i] = NULL;
        }
-       DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
+       DRV_LOG(DEBUG, "port %u MPRQ queue %u failed, freed everything",
                rxq->port_id, rxq->idx);
        rte_errno = err; /* Restore rte_errno. */
        return -rte_errno;
@@ -204,7 +204,9 @@ static int
 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
        const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
-       unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
+       unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
+               (1 << rxq_ctrl->rxq.elts_n) * (1 << rxq_ctrl->rxq.strd_num_n) :
+               (1 << rxq_ctrl->rxq.elts_n);
        unsigned int i;
        int err;
 
@@ -262,7 +264,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
                        (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
        }
        DRV_LOG(DEBUG,
-               "port %u Rx queue %u allocated and configured %u segments"
+               "port %u SPRQ queue %u allocated and configured %u segments"
                " (max %u packets)",
                PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
                elts_n / (1 << rxq_ctrl->rxq.sges_n));
@@ -275,7 +277,7 @@ error:
                        rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
                (*rxq_ctrl->rxq.elts)[i] = NULL;
        }
-       DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
+       DRV_LOG(DEBUG, "port %u SPRQ queue %u failed, freed everything",
                PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
        rte_errno = err; /* Restore rte_errno. */
        return -rte_errno;
@@ -293,8 +295,15 @@ error:
 int
 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-       return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
-              rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
+       int ret = 0;
+
+       /**
+        * For MPRQ we need to allocate both MPRQ buffers
+        * for WQEs and simple mbufs for vector processing.
+        */
+       if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
+               ret = rxq_alloc_elts_mprq(rxq_ctrl);
+       return (ret || rxq_alloc_elts_sprq(rxq_ctrl));
 }
 
 /**
@@ -309,11 +318,10 @@ rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
        struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
        uint16_t i;
 
-       DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
-               rxq->port_id, rxq->idx);
+       DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing %d WRs",
+               rxq->port_id, rxq->idx, (1u << rxq->elts_n));
        if (rxq->mprq_bufs == NULL)
                return;
-       MLX5_ASSERT(mlx5_rxq_check_vec_support(rxq) < 0);
        for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
                if ((*rxq->mprq_bufs)[i] != NULL)
                        mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]);
@@ -335,25 +343,27 @@ static void
 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
        struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
-       const uint16_t q_n = (1 << rxq->elts_n);
+       const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
+               (1 << rxq->elts_n) * (1 << rxq->strd_num_n) :
+               (1 << rxq->elts_n);
        const uint16_t q_mask = q_n - 1;
        uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
        uint16_t i;
 
-       DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
-               PORT_ID(rxq_ctrl->priv), rxq->idx);
+       DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs",
+               PORT_ID(rxq_ctrl->priv), rxq->idx, q_n);
        if (rxq->elts == NULL)
                return;
        /**
-        * Some mbuf in the Ring belongs to the application.  They cannot be
-        * freed.
+        * Some mbuf in the Ring belongs to the application.
+        * They cannot be freed.
         */
        if (mlx5_rxq_check_vec_support(rxq) > 0) {
                for (i = 0; i < used; ++i)
                        (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
                rxq->rq_pi = rxq->rq_ci;
        }
-       for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
+       for (i = 0; i != q_n; ++i) {
                if ((*rxq->elts)[i] != NULL)
                        rte_pktmbuf_free_seg((*rxq->elts)[i]);
                (*rxq->elts)[i] = NULL;
@@ -369,10 +379,13 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 static void
 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
+       /*
+        * For MPRQ we need to allocate both MPRQ buffers
+        * for WQEs and simple mbufs for vector processing.
+        */
        if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
                rxq_free_elts_mprq(rxq_ctrl);
-       else
-               rxq_free_elts_sprq(rxq_ctrl);
+       rxq_free_elts_sprq(rxq_ctrl);
 }
 
 /**
@@ -1334,20 +1347,10 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_rxq_ctrl *tmpl;
        unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-       unsigned int mprq_stride_nums;
-       unsigned int mprq_stride_size;
-       unsigned int mprq_stride_cap;
        struct mlx5_dev_config *config = &priv->config;
-       /*
-        * Always allocate extra slots, even if eventually
-        * the vector Rx will not be used.
-        */
-       uint16_t desc_n =
-               desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
        uint64_t offloads = conf->offloads |
                           dev->data->dev_conf.rxmode.offloads;
        unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO);
-       const int mprq_en = mlx5_check_mprq_support(dev) > 0;
        unsigned int max_rx_pkt_len = lro_on_queue ?
                        dev->data->dev_conf.rxmode.max_lro_pkt_size :
                        dev->data->dev_conf.rxmode.max_rx_pkt_len;
@@ -1355,6 +1358,21 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                                                        RTE_PKTMBUF_HEADROOM;
        unsigned int max_lro_size = 0;
        unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
+       const int mprq_en = mlx5_check_mprq_support(dev) > 0;
+       unsigned int mprq_stride_nums = config->mprq.stride_num_n ?
+               config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
+       unsigned int mprq_stride_size = non_scatter_min_mbuf_size <=
+               (1U << config->mprq.max_stride_size_n) ?
+               log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
+       unsigned int mprq_stride_cap = (config->mprq.stride_num_n ?
+               (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
+               (config->mprq.stride_size_n ?
+               (1U << config->mprq.stride_size_n) : (1U << mprq_stride_size));
+       /*
+        * Always allocate extra slots, even if eventually
+        * the vector Rx will not be used.
+        */
+       uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 
        if (non_scatter_min_mbuf_size > mb_len && !(offloads &
                                                    DEV_RX_OFFLOAD_SCATTER)) {
@@ -1366,8 +1384,11 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                rte_errno = ENOSPC;
                return NULL;
        }
-       tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
-                          desc_n * sizeof(struct rte_mbuf *), 0, socket);
+       tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
+               sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *) +
+               (desc >> mprq_stride_nums) * sizeof(struct mlx5_mprq_buf *),
+               0, socket);
+
        if (!tmpl) {
                rte_errno = ENOMEM;
                return NULL;
@@ -1381,15 +1402,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        tmpl->socket = socket;
        if (dev->data->dev_conf.intr_conf.rxq)
                tmpl->irq = 1;
-       mprq_stride_nums = config->mprq.stride_num_n ?
-               config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
-       mprq_stride_size = non_scatter_min_mbuf_size <=
-               (1U << config->mprq.max_stride_size_n) ?
-               log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
-       mprq_stride_cap = (config->mprq.stride_num_n ?
-               (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
-                       (config->mprq.stride_size_n ?
-               (1U << config->mprq.stride_size_n) : (1U << mprq_stride_size));
        /*
         * This Rx queue can be configured as a Multi-Packet RQ if all of the
         * following conditions are met:
@@ -1543,9 +1555,11 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        tmpl->rxq.mp = mp;
        tmpl->rxq.elts_n = log2above(desc);
        tmpl->rxq.rq_repl_thresh =
-               MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
+               MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n);
        tmpl->rxq.elts =
-               (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+               (struct rte_mbuf *(*)[desc_n])(tmpl + 1);
+       tmpl->rxq.mprq_bufs =
+               (struct mlx5_mprq_buf *(*)[desc])(*tmpl->rxq.elts + desc_n);
 #ifndef RTE_ARCH_64
        tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
 #endif
index b577aab..f121ed7 100644 (file)
 #include <mlx5_prm.h>
 #include <mlx5_common.h>
 
+#include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 #include "mlx5.h"
 #include "mlx5_mr.h"
 #include "mlx5_utils.h"
 #include "mlx5_rxtx.h"
-#include "mlx5_autoconf.h"
 
 /* TX burst subroutines return codes. */
 enum mlx5_txcmp_code {
@@ -93,10 +93,6 @@ static __rte_always_inline void
 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
               volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res);
 
-static __rte_always_inline void
-mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx,
-                const unsigned int strd_n);
-
 static int
 mlx5_queue_state_modify(struct rte_eth_dev *dev,
                        struct mlx5_mp_arg_queue_state_modify *sm);
@@ -584,7 +580,14 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
                       struct rte_eth_burst_mode *mode)
 {
        eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_rxq_data *rxq;
 
+       rxq = (*priv->rxqs)[rx_queue_id];
+       if (!rxq) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
        if (pkt_burst == mlx5_rx_burst) {
                snprintf(mode->info, sizeof(mode->info), "%s", "Scalar");
        } else if (pkt_burst == mlx5_rx_burst_mprq) {
@@ -598,6 +601,16 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
                snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec");
 #else
                return -EINVAL;
+#endif
+       } else if (pkt_burst == mlx5_rx_burst_mprq_vec) {
+#if defined RTE_ARCH_X86_64
+               snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE");
+#elif defined RTE_ARCH_ARM64
+               snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon");
+#elif defined RTE_ARCH_PPC_64
+               snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec");
+#else
+               return -EINVAL;
 #endif
        } else {
                return -EINVAL;
@@ -866,6 +879,8 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
        rxq->zip = (struct rxq_zip){
                .ai = 0,
        };
+       rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ?
+               (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0;
        /* Update doorbell counter. */
        rxq->rq_ci = wqe_n >> rxq->sges_n;
        rte_io_wmb();
@@ -969,7 +984,8 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
 {
        const uint16_t cqe_n = 1 << rxq->cqe_n;
        const uint16_t cqe_mask = cqe_n - 1;
-       const unsigned int wqe_n = 1 << rxq->elts_n;
+       const uint16_t wqe_n = 1 << rxq->elts_n;
+       const uint16_t strd_n = 1 << rxq->strd_num_n;
        struct mlx5_rxq_ctrl *rxq_ctrl =
                        container_of(rxq, struct mlx5_rxq_ctrl, rxq);
        union {
@@ -1033,21 +1049,27 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
                                                    &sm))
                                return -1;
                        if (vec) {
-                               const uint16_t q_mask = wqe_n - 1;
-                               uint16_t elt_idx;
+                               const uint32_t elts_n =
+                                       mlx5_rxq_mprq_enabled(rxq) ?
+                                       wqe_n * strd_n : wqe_n;
+                               const uint32_t e_mask = elts_n - 1;
+                               uint32_t elts_ci =
+                                       mlx5_rxq_mprq_enabled(rxq) ?
+                                       rxq->elts_ci : rxq->rq_ci;
+                               uint32_t elt_idx;
                                struct rte_mbuf **elt;
                                int i;
-                               unsigned int n = wqe_n - (rxq->rq_ci -
+                               unsigned int n = elts_n - (elts_ci -
                                                          rxq->rq_pi);
 
                                for (i = 0; i < (int)n; ++i) {
-                                       elt_idx = (rxq->rq_ci + i) & q_mask;
+                                       elt_idx = (elts_ci + i) & e_mask;
                                        elt = &(*rxq->elts)[elt_idx];
                                        *elt = rte_mbuf_raw_alloc(rxq->mp);
                                        if (!*elt) {
                                                for (i--; i >= 0; --i) {
-                                                       elt_idx = (rxq->rq_ci +
-                                                                  i) & q_mask;
+                                                       elt_idx = (elts_ci +
+                                                                  i) & elts_n;
                                                        elt = &(*rxq->elts)
                                                                [elt_idx];
                                                        rte_pktmbuf_free_seg
@@ -1056,7 +1078,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
                                                return -1;
                                        }
                                }
-                               for (i = 0; i < (int)wqe_n; ++i) {
+                               for (i = 0; i < (int)elts_n; ++i) {
                                        elt = &(*rxq->elts)[i];
                                        DATA_LEN(*elt) =
                                                (uint16_t)((*elt)->buf_len -
@@ -1064,7 +1086,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
                                }
                                /* Padding with a fake mbuf for vec Rx. */
                                for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
-                                       (*rxq->elts)[wqe_n + i] =
+                                       (*rxq->elts)[elts_n + i] =
                                                                &rxq->fake_mbuf;
                        }
                        mlx5_rxq_initialize(rxq);
@@ -1545,31 +1567,6 @@ mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf)
        mlx5_mprq_buf_free_cb(NULL, buf);
 }
 
-static inline void
-mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx,
-                const unsigned int strd_n)
-{
-       struct mlx5_mprq_buf *rep = rxq->mprq_repl;
-       volatile struct mlx5_wqe_data_seg *wqe =
-               &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
-       void *addr;
-
-       MLX5_ASSERT(rep != NULL);
-       /* Replace MPRQ buf. */
-       (*rxq->mprq_bufs)[rq_idx] = rep;
-       /* Replace WQE. */
-       addr = mlx5_mprq_buf_addr(rep, strd_n);
-       wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
-       /* If there's only one MR, no need to replace LKey in WQE. */
-       if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
-               wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
-       /* Stash a mbuf for next replacement. */
-       if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
-               rxq->mprq_repl = rep;
-       else
-               rxq->mprq_repl = NULL;
-}
-
 /**
  * DPDK callback for RX with Multi-Packet RQ support.
  *
@@ -1587,12 +1584,9 @@ uint16_t
 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
        struct mlx5_rxq_data *rxq = dpdk_rxq;
-       const unsigned int strd_n = 1 << rxq->strd_num_n;
-       const unsigned int strd_sz = 1 << rxq->strd_sz_n;
-       const unsigned int strd_shift =
-               MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
-       const unsigned int cq_mask = (1 << rxq->cqe_n) - 1;
-       const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
+       const uint32_t strd_n = 1 << rxq->strd_num_n;
+       const uint32_t cq_mask = (1 << rxq->cqe_n) - 1;
+       const uint32_t wq_mask = (1 << rxq->elts_n) - 1;
        volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
        unsigned int i = 0;
        uint32_t rq_ci = rxq->rq_ci;
@@ -1601,37 +1595,18 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 
        while (i < pkts_n) {
                struct rte_mbuf *pkt;
-               void *addr;
                int ret;
                uint32_t len;
                uint16_t strd_cnt;
                uint16_t strd_idx;
-               uint32_t offset;
                uint32_t byte_cnt;
-               int32_t hdrm_overlap;
                volatile struct mlx5_mini_cqe8 *mcqe = NULL;
                uint32_t rss_hash_res = 0;
+               enum mlx5_rqx_code rxq_code;
 
                if (consumed_strd == strd_n) {
-                       /* Replace WQE only if the buffer is still in use. */
-                       if (__atomic_load_n(&buf->refcnt,
-                                           __ATOMIC_RELAXED) > 1) {
-                               mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n);
-                               /* Release the old buffer. */
-                               mlx5_mprq_buf_free(buf);
-                       } else if (unlikely(rxq->mprq_repl == NULL)) {
-                               struct mlx5_mprq_buf *rep;
-
-                               /*
-                                * Currently, the MPRQ mempool is out of buffer
-                                * and doing memcpy regardless of the size of Rx
-                                * packet. Retry allocation to get back to
-                                * normal.
-                                */
-                               if (!rte_mempool_get(rxq->mprq_mp,
-                                                    (void **)&rep))
-                                       rxq->mprq_repl = rep;
-                       }
+                       /* Replace WQE if the buffer is still in use. */
+                       mprq_buf_replace(rxq, rq_ci & wq_mask);
                        /* Advance to the next WQE. */
                        consumed_strd = 0;
                        ++rq_ci;
@@ -1667,122 +1642,23 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
                if (rxq->crc_present)
                        len -= RTE_ETHER_CRC_LEN;
-               offset = strd_idx * strd_sz + strd_shift;
-               addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
-               hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
-               /*
-                * Memcpy packets to the target mbuf if:
-                * - The size of packet is smaller than mprq_max_memcpy_len.
-                * - Out of buffer in the Mempool for Multi-Packet RQ.
-                * - The packet's stride overlaps a headroom and scatter is off.
-                */
-               if (len <= rxq->mprq_max_memcpy_len ||
-                   rxq->mprq_repl == NULL ||
-                   (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
-                       if (likely(rte_pktmbuf_tailroom(pkt) >= len)) {
-                               rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
-                                          addr, len);
-                               DATA_LEN(pkt) = len;
-                       } else if (rxq->strd_scatter_en) {
-                               struct rte_mbuf *prev = pkt;
-                               uint32_t seg_len =
-                                       RTE_MIN(rte_pktmbuf_tailroom(pkt), len);
-                               uint32_t rem_len = len - seg_len;
-
-                               rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
-                                          addr, seg_len);
-                               DATA_LEN(pkt) = seg_len;
-                               while (rem_len) {
-                                       struct rte_mbuf *next =
-                                               rte_pktmbuf_alloc(rxq->mp);
-
-                                       if (unlikely(next == NULL)) {
-                                               rte_pktmbuf_free(pkt);
-                                               ++rxq->stats.rx_nombuf;
-                                               goto out;
-                                       }
-                                       NEXT(prev) = next;
-                                       SET_DATA_OFF(next, 0);
-                                       addr = RTE_PTR_ADD(addr, seg_len);
-                                       seg_len = RTE_MIN
-                                               (rte_pktmbuf_tailroom(next),
-                                                rem_len);
-                                       rte_memcpy
-                                               (rte_pktmbuf_mtod(next, void *),
-                                                addr, seg_len);
-                                       DATA_LEN(next) = seg_len;
-                                       rem_len -= seg_len;
-                                       prev = next;
-                                       ++NB_SEGS(pkt);
-                               }
-                       } else {
-                               rte_pktmbuf_free_seg(pkt);
+               rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf,
+                                          strd_idx, strd_cnt);
+               if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
+                       rte_pktmbuf_free_seg(pkt);
+                       if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
                                ++rxq->stats.idropped;
                                continue;
                        }
-               } else {
-                       rte_iova_t buf_iova;
-                       struct rte_mbuf_ext_shared_info *shinfo;
-                       uint16_t buf_len = strd_cnt * strd_sz;
-                       void *buf_addr;
-
-                       /* Increment the refcnt of the whole chunk. */
-                       __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED);
-                       MLX5_ASSERT(__atomic_load_n(&buf->refcnt,
-                                   __ATOMIC_RELAXED) <= strd_n + 1);
-                       buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
-                       /*
-                        * MLX5 device doesn't use iova but it is necessary in a
-                        * case where the Rx packet is transmitted via a
-                        * different PMD.
-                        */
-                       buf_iova = rte_mempool_virt2iova(buf) +
-                                  RTE_PTR_DIFF(buf_addr, buf);
-                       shinfo = &buf->shinfos[strd_idx];
-                       rte_mbuf_ext_refcnt_set(shinfo, 1);
-                       /*
-                        * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
-                        * attaching the stride to mbuf and more offload flags
-                        * will be added below by calling rxq_cq_to_mbuf().
-                        * Other fields will be overwritten.
-                        */
-                       rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
-                                                 buf_len, shinfo);
-                       /* Set mbuf head-room. */
-                       SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
-                       MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
-                       MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >=
-                               len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
-                       DATA_LEN(pkt) = len;
-                       /*
-                        * Copy the last fragment of a packet (up to headroom
-                        * size bytes) in case there is a stride overlap with
-                        * a next packet's headroom. Allocate a separate mbuf
-                        * to store this fragment and link it. Scatter is on.
-                        */
-                       if (hdrm_overlap > 0) {
-                               MLX5_ASSERT(rxq->strd_scatter_en);
-                               struct rte_mbuf *seg =
-                                       rte_pktmbuf_alloc(rxq->mp);
-
-                               if (unlikely(seg == NULL)) {
-                                       rte_pktmbuf_free_seg(pkt);
-                                       ++rxq->stats.rx_nombuf;
-                                       break;
-                               }
-                               SET_DATA_OFF(seg, 0);
-                               rte_memcpy(rte_pktmbuf_mtod(seg, void *),
-                                       RTE_PTR_ADD(addr, len - hdrm_overlap),
-                                       hdrm_overlap);
-                               DATA_LEN(seg) = hdrm_overlap;
-                               DATA_LEN(pkt) = len - hdrm_overlap;
-                               NEXT(pkt) = seg;
-                               NB_SEGS(pkt) = 2;
+                       if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
+                               ++rxq->stats.rx_nombuf;
+                               break;
                        }
                }
                rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
                if (cqe->lro_num_seg > 1) {
-                       mlx5_lro_update_hdr(addr, cqe, len);
+                       mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *),
+                                           cqe, len);
                        pkt->ol_flags |= PKT_RX_LRO;
                        pkt->tso_segsz = len / cqe->lro_num_seg;
                }
@@ -1796,7 +1672,6 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                *(pkts++) = pkt;
                ++i;
        }
-out:
        /* Update the consumer indexes. */
        rxq->consumed_strd = consumed_strd;
        rte_io_wmb();
@@ -1878,6 +1753,14 @@ mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
        return 0;
 }
 
+__rte_weak uint16_t
+mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused,
+                      struct rte_mbuf **pkts __rte_unused,
+                      uint16_t pkts_n __rte_unused)
+{
+       return 0;
+}
+
 __rte_weak int
 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
 {
index bcb1595..7e642ea 100644 (file)
@@ -30,6 +30,7 @@
 #include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
+#include "mlx5_mr.h"
 
 /* Support tunnel matching. */
 #define MLX5_FLOW_TUNNEL 10
@@ -94,6 +95,12 @@ enum mlx5_rxq_err_state {
        MLX5_RXQ_ERR_STATE_NEED_READY,
 };
 
+enum mlx5_rqx_code {
+       MLX5_RXQ_CODE_EXIT = 0,
+       MLX5_RXQ_CODE_NOMBUF,
+       MLX5_RXQ_CODE_DROPPED,
+};
+
 /* RX queue descriptor. */
 struct mlx5_rxq_data {
        unsigned int csum:1; /* Enable checksum offloading. */
@@ -116,6 +123,7 @@ struct mlx5_rxq_data {
        volatile uint32_t *rq_db;
        volatile uint32_t *cq_db;
        uint16_t port_id;
+       uint32_t elts_ci;
        uint32_t rq_ci;
        uint16_t consumed_strd; /* Number of consumed strides in WQE. */
        uint32_t rq_pi;
@@ -130,11 +138,8 @@ struct mlx5_rxq_data {
        uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */
        volatile void *wqes;
        volatile struct mlx5_cqe(*cqes)[];
-       RTE_STD_C11
-       union  {
-               struct rte_mbuf *(*elts)[];
-               struct mlx5_mprq_buf *(*mprq_bufs)[];
-       };
+       struct rte_mbuf *(*elts)[];
+       struct mlx5_mprq_buf *(*mprq_bufs)[];
        struct rte_mempool *mp;
        struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
        struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
@@ -423,6 +428,8 @@ int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);
 int mlx5_check_vec_rx_support(struct rte_eth_dev *dev);
 uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
                           uint16_t pkts_n);
+uint16_t mlx5_rx_burst_mprq_vec(void *dpdk_txq, struct rte_mbuf **pkts,
+                               uint16_t pkts_n);
 
 /* mlx5_mr.c */
 
@@ -700,4 +707,187 @@ mlx5_timestamp_set(struct rte_mbuf *mbuf, int offset,
        *RTE_MBUF_DYNFIELD(mbuf, offset, rte_mbuf_timestamp_t *) = timestamp;
 }
 
+/**
+ * Replace MPRQ buffer.
+ *
+ * @param rxq
+ *   Pointer to Rx queue structure.
+ * @param rq_idx
+ *   RQ index to replace.
+ */
+static __rte_always_inline void
+mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx)
+{
+       const uint32_t strd_n = 1 << rxq->strd_num_n;
+       struct mlx5_mprq_buf *rep = rxq->mprq_repl;
+       volatile struct mlx5_wqe_data_seg *wqe =
+               &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
+       struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_idx];
+       void *addr;
+
+       if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) > 1) {
+               MLX5_ASSERT(rep != NULL);
+               /* Replace MPRQ buf. */
+               (*rxq->mprq_bufs)[rq_idx] = rep;
+               /* Replace WQE. */
+               addr = mlx5_mprq_buf_addr(rep, strd_n);
+               wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
+               /* If there's only one MR, no need to replace LKey in WQE. */
+               if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
+                       wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
+               /* Stash a mbuf for next replacement. */
+               if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
+                       rxq->mprq_repl = rep;
+               else
+                       rxq->mprq_repl = NULL;
+               /* Release the old buffer. */
+               mlx5_mprq_buf_free(buf);
+       } else if (unlikely(rxq->mprq_repl == NULL)) {
+               struct mlx5_mprq_buf *rep;
+
+               /*
+                * Currently, the MPRQ mempool is out of buffer
+                * and doing memcpy regardless of the size of Rx
+                * packet. Retry allocation to get back to
+                * normal.
+                */
+               if (!rte_mempool_get(rxq->mprq_mp, (void **)&rep))
+                       rxq->mprq_repl = rep;
+       }
+}
+
+/**
+ * Attach or copy MPRQ buffer content to a packet.
+ *
+ * @param rxq
+ *   Pointer to Rx queue structure.
+ * @param pkt
+ *   Pointer to a packet to fill.
+ * @param len
+ *   Packet length.
+ * @param buf
+ *   Pointer to a MPRQ buffer to take the data from.
+ * @param strd_idx
+ *   Stride index to start from.
+ * @param strd_cnt
+ *   Number of strides to consume.
+ */
+static __rte_always_inline enum mlx5_rqx_code
+mprq_buf_to_pkt(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, uint32_t len,
+               struct mlx5_mprq_buf *buf, uint16_t strd_idx, uint16_t strd_cnt)
+{
+       const uint32_t strd_n = 1 << rxq->strd_num_n;
+       const uint16_t strd_sz = 1 << rxq->strd_sz_n;
+       const uint16_t strd_shift =
+               MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
+       const int32_t hdrm_overlap =
+               len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
+       const uint32_t offset = strd_idx * strd_sz + strd_shift;
+       void *addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
+
+       /*
+        * Memcpy packets to the target mbuf if:
+        * - The size of packet is smaller than mprq_max_memcpy_len.
+        * - Out of buffer in the Mempool for Multi-Packet RQ.
+        * - The packet's stride overlaps a headroom and scatter is off.
+        */
+       if (len <= rxq->mprq_max_memcpy_len ||
+           rxq->mprq_repl == NULL ||
+           (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
+               if (likely(len <=
+                          (uint32_t)(pkt->buf_len - RTE_PKTMBUF_HEADROOM))) {
+                       rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
+                                  addr, len);
+                       DATA_LEN(pkt) = len;
+               } else if (rxq->strd_scatter_en) {
+                       struct rte_mbuf *prev = pkt;
+                       uint32_t seg_len = RTE_MIN(len, (uint32_t)
+                               (pkt->buf_len - RTE_PKTMBUF_HEADROOM));
+                       uint32_t rem_len = len - seg_len;
+
+                       rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
+                                  addr, seg_len);
+                       DATA_LEN(pkt) = seg_len;
+                       while (rem_len) {
+                               struct rte_mbuf *next =
+                                       rte_pktmbuf_alloc(rxq->mp);
+
+                               if (unlikely(next == NULL))
+                                       return MLX5_RXQ_CODE_NOMBUF;
+                               NEXT(prev) = next;
+                               SET_DATA_OFF(next, 0);
+                               addr = RTE_PTR_ADD(addr, seg_len);
+                               seg_len = RTE_MIN(rem_len, (uint32_t)
+                                       (next->buf_len - RTE_PKTMBUF_HEADROOM));
+                               rte_memcpy
+                                       (rte_pktmbuf_mtod(next, void *),
+                                        addr, seg_len);
+                               DATA_LEN(next) = seg_len;
+                               rem_len -= seg_len;
+                               prev = next;
+                               ++NB_SEGS(pkt);
+                       }
+               } else {
+                       return MLX5_RXQ_CODE_DROPPED;
+               }
+       } else {
+               rte_iova_t buf_iova;
+               struct rte_mbuf_ext_shared_info *shinfo;
+               uint16_t buf_len = strd_cnt * strd_sz;
+               void *buf_addr;
+
+               /* Increment the refcnt of the whole chunk. */
+               __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED);
+               MLX5_ASSERT(__atomic_load_n(&buf->refcnt,
+                           __ATOMIC_RELAXED) <= strd_n + 1);
+               buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
+               /*
+                * MLX5 device doesn't use iova but it is necessary in a
+                * case where the Rx packet is transmitted via a
+                * different PMD.
+                */
+               buf_iova = rte_mempool_virt2iova(buf) +
+                          RTE_PTR_DIFF(buf_addr, buf);
+               shinfo = &buf->shinfos[strd_idx];
+               rte_mbuf_ext_refcnt_set(shinfo, 1);
+               /*
+                * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
+                * attaching the stride to mbuf and more offload flags
+                * will be added below by calling rxq_cq_to_mbuf().
+                * Other fields will be overwritten.
+                */
+               rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
+                                         buf_len, shinfo);
+               /* Set mbuf head-room. */
+               SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
+               MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
+               MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >=
+                       len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
+               DATA_LEN(pkt) = len;
+               /*
+                * Copy the last fragment of a packet (up to headroom
+                * size bytes) in case there is a stride overlap with
+                * a next packet's headroom. Allocate a separate mbuf
+                * to store this fragment and link it. Scatter is on.
+                */
+               if (hdrm_overlap > 0) {
+                       MLX5_ASSERT(rxq->strd_scatter_en);
+                       struct rte_mbuf *seg =
+                               rte_pktmbuf_alloc(rxq->mp);
+
+                       if (unlikely(seg == NULL))
+                               return MLX5_RXQ_CODE_NOMBUF;
+                       SET_DATA_OFF(seg, 0);
+                       rte_memcpy(rte_pktmbuf_mtod(seg, void *),
+                               RTE_PTR_ADD(addr, len - hdrm_overlap),
+                               hdrm_overlap);
+                       DATA_LEN(seg) = hdrm_overlap;
+                       DATA_LEN(pkt) = len - hdrm_overlap;
+                       NEXT(pkt) = seg;
+                       NB_SEGS(pkt) = 2;
+               }
+       }
+       return MLX5_RXQ_CODE_EXIT;
+}
+
 #endif /* RTE_PMD_MLX5_RXTX_H_ */
index aa48775..469ea84 100644 (file)
@@ -77,6 +77,177 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
        return n;
 }
 
+/**
+ * Replenish buffers for RX in bulk.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ */
+static inline void
+mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
+{
+       const uint16_t q_n = 1 << rxq->elts_n;
+       const uint16_t q_mask = q_n - 1;
+       uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
+       uint16_t elts_idx = rxq->rq_ci & q_mask;
+       struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+       volatile struct mlx5_wqe_data_seg *wq =
+               &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
+       unsigned int i;
+
+       if (n >= rxq->rq_repl_thresh) {
+               MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
+               MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
+                           MLX5_VPMD_DESCS_PER_LOOP);
+               /* Not to cross queue end. */
+               n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
+               if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+                       rxq->stats.rx_nombuf += n;
+                       return;
+               }
+               for (i = 0; i < n; ++i) {
+                       void *buf_addr;
+
+                       /*
+                        * In order to support the mbufs with external attached
+                        * data buffer we should use the buf_addr pointer
+                        * instead of rte_mbuf_buf_addr(). It touches the mbuf
+                        * itself and may impact the performance.
+                        */
+                       buf_addr = elts[i]->buf_addr;
+                       wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
+                                                     RTE_PKTMBUF_HEADROOM);
+                       /* If there's a single MR, no need to replace LKey. */
+                       if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
+                                    > 1))
+                               wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
+               }
+               rxq->rq_ci += n;
+               /* Prevent overflowing into consumed mbufs. */
+               elts_idx = rxq->rq_ci & q_mask;
+               for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+                       (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
+               rte_io_wmb();
+               *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+       }
+}
+
+/**
+ * Replenish buffers for MPRQ RX in bulk.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ */
+static inline void
+mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
+{
+       const uint16_t wqe_n = 1 << rxq->elts_n;
+       const uint32_t strd_n = 1 << rxq->strd_num_n;
+       const uint32_t elts_n = wqe_n * strd_n;
+       const uint32_t wqe_mask = elts_n - 1;
+       uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
+       uint32_t elts_idx = rxq->elts_ci & wqe_mask;
+       struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+
+       /* Not to cross queue end. */
+       if (n >= rxq->rq_repl_thresh) {
+               MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
+               MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
+                            MLX5_VPMD_DESCS_PER_LOOP);
+               n = RTE_MIN(n, elts_n - elts_idx);
+               if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+                       rxq->stats.rx_nombuf += n;
+                       return;
+               }
+               rxq->elts_ci += n;
+       }
+}
+
+/**
+ * Copy or attach MPRQ buffers to RX SW ring.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param pkts
+ *   Pointer to array of packets to be stored.
+ * @param pkts_n
+ *   Number of packets to be stored.
+ *
+ * @return
+ *   Number of packets successfully copied/attached (<= pkts_n).
+ */
+static inline uint16_t
+rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
+                    struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+       const uint16_t wqe_n = 1 << rxq->elts_n;
+       const uint16_t wqe_mask = wqe_n - 1;
+       const uint16_t strd_sz = 1 << rxq->strd_sz_n;
+       const uint32_t strd_n = 1 << rxq->strd_num_n;
+       const uint32_t elts_n = wqe_n * strd_n;
+       const uint32_t elts_mask = elts_n - 1;
+       uint32_t elts_idx = rxq->rq_pi & elts_mask;
+       struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+       uint32_t rq_ci = rxq->rq_ci;
+       struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
+       uint16_t copied = 0;
+       uint16_t i = 0;
+
+       for (i = 0; i < pkts_n; ++i) {
+               uint16_t strd_cnt;
+               enum mlx5_rqx_code rxq_code;
+
+               if (rxq->consumed_strd == strd_n) {
+                       /* Replace WQE if the buffer is still in use. */
+                       mprq_buf_replace(rxq, rq_ci & wqe_mask);
+                       /* Advance to the next WQE. */
+                       rxq->consumed_strd = 0;
+                       rq_ci++;
+                       buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
+               }
+
+               if (!elts[i]->pkt_len) {
+                       rxq->consumed_strd = strd_n;
+                       rte_pktmbuf_free_seg(elts[i]);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+                       rxq->stats.ipackets -= 1;
+#endif
+                       continue;
+               }
+               strd_cnt = (elts[i]->pkt_len / strd_sz) +
+                          ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
+               rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
+                                          buf, rxq->consumed_strd, strd_cnt);
+               rxq->consumed_strd += strd_cnt;
+               if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
+                       rte_pktmbuf_free_seg(elts[i]);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+                       rxq->stats.ipackets -= 1;
+                       rxq->stats.ibytes -= elts[i]->pkt_len;
+#endif
+                       if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
+                               ++rxq->stats.rx_nombuf;
+                               break;
+                       }
+                       if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
+                               ++rxq->stats.idropped;
+                               continue;
+                       }
+               }
+               pkts[copied++] = elts[i];
+       }
+       rxq->rq_pi += i;
+       rxq->cq_ci += i;
+       rte_io_wmb();
+       *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+       if (rq_ci != rxq->rq_ci) {
+               rxq->rq_ci = rq_ci;
+               rte_io_wmb();
+               *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+       }
+       return copied;
+}
+
 /**
  * Receive burst of packets. An errored completion also consumes a mbuf, but the
  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
@@ -204,7 +375,142 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
        bool no_cq = false;
 
        do {
-               nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, &err, &no_cq);
+               nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
+                                   &err, &no_cq);
+               if (unlikely(err | rxq->err_state))
+                       nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
+               tn += nb_rx;
+               if (unlikely(no_cq))
+                       break;
+       } while (tn != pkts_n);
+       return tn;
+}
+
+/**
+ * Receive burst of packets. An errored completion also consumes a mbuf, but the
+ * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
+ * before returning to application.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ * @param[out] err
+ *   Pointer to a flag. Set non-zero value if pkts array has at least one error
+ *   packet to handle.
+ * @param[out] no_cq
+ *   Pointer to a boolean. Set true if no new CQE seen.
+ *
+ * @return
+ *   Number of packets received including errors (<= pkts_n).
+ */
+static inline uint16_t
+rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
+                uint16_t pkts_n, uint64_t *err, bool *no_cq)
+{
+       const uint16_t q_n = 1 << rxq->cqe_n;
+       const uint16_t q_mask = q_n - 1;
+       const uint16_t wqe_n = 1 << rxq->elts_n;
+       const uint32_t strd_n = 1 << rxq->strd_num_n;
+       const uint32_t elts_n = wqe_n * strd_n;
+       const uint32_t elts_mask = elts_n - 1;
+       volatile struct mlx5_cqe *cq;
+       struct rte_mbuf **elts;
+       uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
+       uint16_t nocmp_n = 0;
+       uint16_t rcvd_pkt = 0;
+       uint16_t cp_pkt = 0;
+       unsigned int cq_idx = rxq->cq_ci & q_mask;
+       unsigned int elts_idx;
+
+       MLX5_ASSERT(rxq->sges_n == 0);
+       cq = &(*rxq->cqes)[cq_idx];
+       rte_prefetch0(cq);
+       rte_prefetch0(cq + 1);
+       rte_prefetch0(cq + 2);
+       rte_prefetch0(cq + 3);
+       pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
+       mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
+       /* See if there're unreturned mbufs from compressed CQE. */
+       rcvd_pkt = rxq->decompressed;
+       if (rcvd_pkt > 0) {
+               rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
+               cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
+               rxq->decompressed -= rcvd_pkt;
+               pkts += cp_pkt;
+       }
+       elts_idx = rxq->rq_pi & elts_mask;
+       elts = &(*rxq->elts)[elts_idx];
+       /* Not to overflow pkts array. */
+       pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
+       /* Not to cross queue end. */
+       pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
+       pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
+       /* Not to move past the allocated mbufs. */
+       pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
+       if (!pkts_n) {
+               *no_cq = !cp_pkt;
+               return cp_pkt;
+       }
+       /* At this point, there shouldn't be any remaining packets. */
+       MLX5_ASSERT(rxq->decompressed == 0);
+       /* Process all the CQEs */
+       nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
+       /* If no new CQE seen, return without updating cq_db. */
+       if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
+               *no_cq = true;
+               return cp_pkt;
+       }
+       /* Update the consumer indexes for non-compressed CQEs. */
+       MLX5_ASSERT(nocmp_n <= pkts_n);
+       cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
+       rcvd_pkt += cp_pkt;
+       /* Decompress the last CQE if compressed. */
+       if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
+               MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
+               rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
+                                                       &elts[nocmp_n]);
+               /* Return more packets if needed. */
+               if (nocmp_n < pkts_n) {
+                       uint16_t n = rxq->decompressed;
+
+                       n = RTE_MIN(n, pkts_n - nocmp_n);
+                       cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
+                       rcvd_pkt += cp_pkt;
+                       rxq->decompressed -= n;
+               }
+       }
+       *no_cq = !rcvd_pkt;
+       return rcvd_pkt;
+}
+
+/**
+ * DPDK callback for vectorized MPRQ RX.
+ *
+ * @param dpdk_rxq
+ *   Generic pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+       struct mlx5_rxq_data *rxq = dpdk_rxq;
+       uint16_t nb_rx = 0;
+       uint16_t tn = 0;
+       uint64_t err = 0;
+       bool no_cq = false;
+
+       do {
+               nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
+                                        &err, &no_cq);
                if (unlikely(err | rxq->err_state))
                        nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
                tn += nb_rx;
@@ -229,8 +535,6 @@ mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
        struct mlx5_rxq_ctrl *ctrl =
                container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 
-       if (mlx5_mprq_enabled(ETH_DEV(ctrl->priv)))
-               return -ENOTSUP;
        if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
                return -ENOTSUP;
        if (rxq->lro)
@@ -257,8 +561,6 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
                return -ENOTSUP;
        if (!priv->config.rx_vec_en)
                return -ENOTSUP;
-       if (mlx5_mprq_enabled(dev))
-               return -ENOTSUP;
        /* All the configured queues should support. */
        for (i = 0; i < priv->rxqs_n; ++i) {
                struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
index ce27074..93b4f51 100644 (file)
@@ -12,7 +12,6 @@
 #include <mlx5_prm.h>
 
 #include "mlx5_autoconf.h"
-
 #include "mlx5_mr.h"
 
 /* HW checksum offload capabilities of vectorized Tx. */
@@ -68,59 +67,4 @@ S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, sop_drop_qpn) ==
 S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, op_own) ==
                  offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);
 
-/**
- * Replenish buffers for RX in bulk.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- */
-static inline void
-mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
-{
-       const uint16_t q_n = 1 << rxq->elts_n;
-       const uint16_t q_mask = q_n - 1;
-       uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
-       uint16_t elts_idx = rxq->rq_ci & q_mask;
-       struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
-       volatile struct mlx5_wqe_data_seg *wq =
-               &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
-       unsigned int i;
-
-       if (n >= rxq->rq_repl_thresh) {
-               MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
-               MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
-                           MLX5_VPMD_DESCS_PER_LOOP);
-               /* Not to cross queue end. */
-               n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
-               if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
-                       rxq->stats.rx_nombuf += n;
-                       return;
-               }
-               for (i = 0; i < n; ++i) {
-                       void *buf_addr;
-
-                       /*
-                        * In order to support the mbufs with external attached
-                        * data buffer we should use the buf_addr pointer
-                        * instead of rte_mbuf_buf_addr(). It touches the mbuf
-                        * itself and may impact the performance.
-                        */
-                       buf_addr = elts[i]->buf_addr;
-                       wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
-                                                     RTE_PKTMBUF_HEADROOM);
-                       /* If there's a single MR, no need to replace LKey. */
-                       if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
-                                    > 1))
-                               wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
-               }
-               rxq->rq_ci += n;
-               /* Prevent overflowing into consumed mbufs. */
-               elts_idx = rxq->rq_ci & q_mask;
-               for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
-                       (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
-               rte_io_wmb();
-               *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
-       }
-}
-
 #endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */