X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec.c;h=469ea8401da86fc1b973dcd9112008d831973214;hb=1be514fbcea9e8964296b46c91dbb56715503ae7;hp=edc663815b41a765c6aab7013d7b77de02438d11;hpb=3c2ddbd413e34698b66df65a57b0c760ffe0b3d6;p=dpdk.git

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index edc663815b..469ea8401d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -1,225 +1,257 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2017 6WIND S.A.
- *   Copyright 2017 Mellanox.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of 6WIND S.A. nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
  */
 
-#include <assert.h>
 #include <stdint.h>
 #include <string.h>
 #include <stdlib.h>
 
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#include <infiniband/mlx5dv.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
 #include <rte_prefetch.h>
+#include <rte_vect.h>
 
+#include <mlx5_glue.h>
+#include <mlx5_prm.h>
+
+#include "mlx5_defs.h"
 #include "mlx5.h"
 #include "mlx5_utils.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_rxtx_vec.h"
 #include "mlx5_autoconf.h"
-#include "mlx5_defs.h"
-#include "mlx5_prm.h"
 
-#ifdef RTE_ARCH_X86_64
+#if defined RTE_ARCH_X86_64
 #include "mlx5_rxtx_vec_sse.h"
+#elif defined RTE_ARCH_ARM64
+#include "mlx5_rxtx_vec_neon.h"
+#elif defined RTE_ARCH_PPC_64
+#include "mlx5_rxtx_vec_altivec.h"
 #else
 #error "This should not be compiled if SIMD instructions are not supported."
 #endif
 
 /**
- * Count the number of continuous single segment packets.
+ * Skip error packets.
  *
- * @param pkts
- *   Pointer to array of packets.
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
  * @param pkts_n
- *   Number of packets.
+ *   Maximum number of packets in array.
  *
  * @return
- *   Number of continuous single segment packets.
+ *   Number of packets successfully received (<= pkts_n).
  */
-static inline unsigned int
-txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
+static uint16_t
+rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
+			 uint16_t pkts_n)
 {
-	unsigned int pos;
+	uint16_t n = 0;
+	unsigned int i;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	uint32_t err_bytes = 0;
+#endif
 
-	if (!pkts_n)
-		return 0;
-	/* Count the number of continuous single segment packets. */
-	for (pos = 0; pos < pkts_n; ++pos)
-		if (NB_SEGS(pkts[pos]) > 1)
-			break;
-	return pos;
+	for (i = 0; i < pkts_n; ++i) {
+		struct rte_mbuf *pkt = pkts[i];
+
+		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
+#ifdef MLX5_PMD_SOFT_COUNTERS
+			err_bytes += PKT_LEN(pkt);
+#endif
+			rte_pktmbuf_free_seg(pkt);
+		} else {
+			pkts[n++] = pkt;
+		}
+	}
+	rxq->stats.idropped += (pkts_n - n);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	/* Correct counters of errored completions. */
+	rxq->stats.ipackets -= (pkts_n - n);
+	rxq->stats.ibytes -= err_bytes;
+#endif
+	mlx5_rx_err_handle(rxq, 1);
+	return n;
 }
 
 /**
- * Count the number of packets having same ol_flags and calculate cs_flags.
+ * Replenish buffers for RX in bulk.
  *
- * @param txq
- *   Pointer to TX queue structure.
- * @param pkts
- *   Pointer to array of packets.
- * @param pkts_n
- *   Number of packets.
- * @param cs_flags
- *   Pointer of flags to be returned.
- *
- * @return
- *   Number of packets having same ol_flags.
+ * @param rxq
+ *   Pointer to RX queue structure.
  */
-static inline unsigned int
-txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
-		 uint16_t pkts_n, uint8_t *cs_flags)
+static inline void
+mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
 {
-	unsigned int pos;
-	const uint64_t ol_mask =
-		PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
-		PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
-		PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
-
-	if (!pkts_n)
-		return 0;
-	/* Count the number of packets having same ol_flags. */
-	for (pos = 1; pos < pkts_n; ++pos)
-		if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
-			break;
-	/* Should open another MPW session for the rest. */
-	if (pkts[0]->ol_flags &
-	    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-		const uint64_t is_tunneled =
-			pkts[0]->ol_flags &
-			(PKT_TX_TUNNEL_GRE |
-			 PKT_TX_TUNNEL_VXLAN);
-
-		if (is_tunneled && txq->tunnel_en) {
-			*cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
-				    MLX5_ETH_WQE_L4_INNER_CSUM;
-			if (pkts[0]->ol_flags & PKT_TX_OUTER_IP_CKSUM)
-				*cs_flags |= MLX5_ETH_WQE_L3_CSUM;
-		} else {
-			*cs_flags = MLX5_ETH_WQE_L3_CSUM |
-				    MLX5_ETH_WQE_L4_CSUM;
+	const uint16_t q_n = 1 << rxq->elts_n;
+	const uint16_t q_mask = q_n - 1;
+	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
+	uint16_t elts_idx = rxq->rq_ci & q_mask;
+	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+	volatile struct mlx5_wqe_data_seg *wq =
+		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
+	unsigned int i;
+
+	if (n >= rxq->rq_repl_thresh) {
+		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
+		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
+			    MLX5_VPMD_DESCS_PER_LOOP);
+		/* Not to cross queue end. */
+		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
+		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+			rxq->stats.rx_nombuf += n;
+			return;
+		}
+		for (i = 0; i < n; ++i) {
+			void *buf_addr;
+
+			/*
+			 * In order to support the mbufs with external attached
+			 * data buffer we should use the buf_addr pointer
+			 * instead of rte_mbuf_buf_addr(). It touches the mbuf
+			 * itself and may impact the performance.
+			 */
+			buf_addr = elts[i]->buf_addr;
+			wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
+						      RTE_PKTMBUF_HEADROOM);
+			/* If there's a single MR, no need to replace LKey. */
+			if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
+				     > 1))
+				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
 		}
+		rxq->rq_ci += n;
+		/* Prevent overflowing into consumed mbufs. */
+		elts_idx = rxq->rq_ci & q_mask;
+		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
+		rte_io_wmb();
+		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 	}
-	return pos;
 }
 
 /**
- * DPDK callback for vectorized TX.
- *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
+ * Replenish buffers for MPRQ RX in bulk.
  *
- * @return
- *   Number of packets successfully transmitted (<= pkts_n).
+ * @param rxq
+ *   Pointer to RX queue structure.
  */
-uint16_t
-mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
-		      uint16_t pkts_n)
+static inline void
+mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
 {
-	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
-	uint16_t nb_tx = 0;
+	const uint16_t wqe_n = 1 << rxq->elts_n;
+	const uint32_t strd_n = 1 << rxq->strd_num_n;
+	const uint32_t elts_n = wqe_n * strd_n;
+	const uint32_t wqe_mask = elts_n - 1;
+	uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
+	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
+	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
 
-	while (pkts_n > nb_tx) {
-		uint16_t n;
-		uint16_t ret;
-
-		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
-		ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
-		nb_tx += ret;
-		if (!ret)
-			break;
+	/* Not to cross queue end. */
+	if (n >= rxq->rq_repl_thresh) {
+		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
+		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
+			     MLX5_VPMD_DESCS_PER_LOOP);
+		n = RTE_MIN(n, elts_n - elts_idx);
+		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+			rxq->stats.rx_nombuf += n;
+			return;
+		}
+		rxq->elts_ci += n;
 	}
-	return nb_tx;
 }
 
 /**
- * DPDK callback for vectorized TX with multi-seg packets and offload.
+ * Copy or attach MPRQ buffers to RX SW ring.
  *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param pkts
+ *   Pointer to array of packets to be stored.
  * @param pkts_n
- *   Number of packets in array.
+ *   Number of packets to be stored.
  *
  * @return
- *   Number of packets successfully transmitted (<= pkts_n).
+ *   Number of packets successfully copied/attached (<= pkts_n).
  */
-uint16_t
-mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+static inline uint16_t
+rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
+		     struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
-	uint16_t nb_tx = 0;
-
-	while (pkts_n > nb_tx) {
-		uint8_t cs_flags = 0;
-		uint16_t n;
-		uint16_t ret;
-
-		/* Transmit multi-seg packets in the head of pkts list. */
-		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) &&
-		    NB_SEGS(pkts[nb_tx]) > 1)
-			nb_tx += txq_scatter_v(txq,
-					       &pkts[nb_tx],
-					       pkts_n - nb_tx);
-		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
-		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS))
-			n = txq_check_multiseg(&pkts[nb_tx], n);
-		if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
-			n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
-		ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
-		nb_tx += ret;
-		if (!ret)
-			break;
+	const uint16_t wqe_n = 1 << rxq->elts_n;
+	const uint16_t wqe_mask = wqe_n - 1;
+	const uint16_t strd_sz = 1 << rxq->strd_sz_n;
+	const uint32_t strd_n = 1 << rxq->strd_num_n;
+	const uint32_t elts_n = wqe_n * strd_n;
+	const uint32_t elts_mask = elts_n - 1;
+	uint32_t elts_idx = rxq->rq_pi & elts_mask;
+	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+	uint32_t rq_ci = rxq->rq_ci;
+	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
+	uint16_t copied = 0;
+	uint16_t i = 0;
+
+	for (i = 0; i < pkts_n; ++i) {
+		uint16_t strd_cnt;
+		enum mlx5_rqx_code rxq_code;
+
+		if (rxq->consumed_strd == strd_n) {
+			/* Replace WQE if the buffer is still in use. */
+			mprq_buf_replace(rxq, rq_ci & wqe_mask);
+			/* Advance to the next WQE. */
+			rxq->consumed_strd = 0;
+			rq_ci++;
+			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
+		}
+
+		if (!elts[i]->pkt_len) {
+			rxq->consumed_strd = strd_n;
+			rte_pktmbuf_free_seg(elts[i]);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+			rxq->stats.ipackets -= 1;
+#endif
+			continue;
+		}
+		strd_cnt = (elts[i]->pkt_len / strd_sz) +
+			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
+		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
+					   buf, rxq->consumed_strd, strd_cnt);
+		rxq->consumed_strd += strd_cnt;
+		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
+			rte_pktmbuf_free_seg(elts[i]);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+			rxq->stats.ipackets -= 1;
+			rxq->stats.ibytes -= elts[i]->pkt_len;
+#endif
+			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
+				++rxq->stats.rx_nombuf;
+				break;
+			}
+			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
+				++rxq->stats.idropped;
+				continue;
+			}
+		}
+		pkts[copied++] = elts[i];
+	}
+	rxq->rq_pi += i;
+	rxq->cq_ci += i;
+	rte_io_wmb();
+	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+	if (rq_ci != rxq->rq_ci) {
+		rxq->rq_ci = rq_ci;
+		rte_io_wmb();
+		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 	}
-	return nb_tx;
+	return copied;
 }
 
 /**
- * Skip error packets.
+ * Receive burst of packets. An errored completion also consumes a mbuf, but the
+ * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
+ * before returning to application.
  *
  * @param rxq
  *   Pointer to RX queue structure.
@@ -227,40 +259,97 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Array to store received packets.
  * @param pkts_n
  *   Maximum number of packets in array.
+ * @param[out] err
+ *   Pointer to a flag. Set non-zero value if pkts array has at least one error
+ *   packet to handle.
+ * @param[out] no_cq
+ *   Pointer to a boolean. Set true if no new CQE seen.
  *
  * @return
- *   Number of packets successfully received (<= pkts_n).
+ *   Number of packets received including errors (<= pkts_n).
  */
-static uint16_t
-rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
-			 uint16_t pkts_n)
+static inline uint16_t
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
+	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
 {
-	uint16_t n = 0;
-	unsigned int i;
-#ifdef MLX5_PMD_SOFT_COUNTERS
-	uint32_t err_bytes = 0;
-#endif
+	const uint16_t q_n = 1 << rxq->cqe_n;
+	const uint16_t q_mask = q_n - 1;
+	const uint16_t e_n = 1 << rxq->elts_n;
+	const uint16_t e_mask = e_n - 1;
+	volatile struct mlx5_cqe *cq;
+	struct rte_mbuf **elts;
+	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
+	uint16_t nocmp_n = 0;
+	uint16_t rcvd_pkt = 0;
+	unsigned int cq_idx = rxq->cq_ci & q_mask;
+	unsigned int elts_idx;
 
-	for (i = 0; i < pkts_n; ++i) {
-		struct rte_mbuf *pkt = pkts[i];
+	MLX5_ASSERT(rxq->sges_n == 0);
+	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
+	cq = &(*rxq->cqes)[cq_idx];
+	rte_prefetch0(cq);
+	rte_prefetch0(cq + 1);
+	rte_prefetch0(cq + 2);
+	rte_prefetch0(cq + 3);
+	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
+	mlx5_rx_replenish_bulk_mbuf(rxq);
+	/* See if there're unreturned mbufs from compressed CQE. */
+	rcvd_pkt = rxq->decompressed;
+	if (rcvd_pkt > 0) {
+		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
+		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
+				pkts, rcvd_pkt);
+		rxq->rq_pi += rcvd_pkt;
+		rxq->decompressed -= rcvd_pkt;
+		pkts += rcvd_pkt;
+	}
+	elts_idx = rxq->rq_pi & e_mask;
+	elts = &(*rxq->elts)[elts_idx];
+	/* Not to overflow pkts array. */
+	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
+	/* Not to cross queue end. */
+	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
+	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
+	if (!pkts_n) {
+		*no_cq = !rcvd_pkt;
+		return rcvd_pkt;
+	}
+	/* At this point, there shouldn't be any remaining packets. */
+	MLX5_ASSERT(rxq->decompressed == 0);
+	/* Process all the CQEs */
+	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
+	/* If no new CQE seen, return without updating cq_db. */
+	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
+		*no_cq = true;
+		return rcvd_pkt;
+	}
+	/* Update the consumer indexes for non-compressed CQEs. */
+	MLX5_ASSERT(nocmp_n <= pkts_n);
+	rxq->cq_ci += nocmp_n;
+	rxq->rq_pi += nocmp_n;
+	rcvd_pkt += nocmp_n;
+	/* Decompress the last CQE if compressed. */
+	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
+		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
+		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
+							&elts[nocmp_n]);
+		rxq->cq_ci += rxq->decompressed;
+		/* Return more packets if needed. */
+		if (nocmp_n < pkts_n) {
+			uint16_t n = rxq->decompressed;
 
-		if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
-#ifdef MLX5_PMD_SOFT_COUNTERS
-			err_bytes += PKT_LEN(pkt);
-#endif
-			rte_pktmbuf_free_seg(pkt);
-		} else {
-			pkts[n++] = pkt;
+			n = RTE_MIN(n, pkts_n - nocmp_n);
+			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
+					&pkts[nocmp_n], n);
+			rxq->rq_pi += n;
+			rcvd_pkt += n;
+			rxq->decompressed -= n;
 		}
 	}
-	rxq->stats.idropped += (pkts_n - n);
-#ifdef MLX5_PMD_SOFT_COUNTERS
-	/* Correct counters of errored completions. */
-	rxq->stats.ipackets -= (pkts_n - n);
-	rxq->stats.ibytes -= err_bytes;
-#endif
-	rxq->pending_err = 0;
-	return n;
+	rte_io_wmb();
+	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+	*no_cq = !rcvd_pkt;
+	return rcvd_pkt;
 }
 
 /**
@@ -280,59 +369,155 @@ uint16_t
 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	struct mlx5_rxq_data *rxq = dpdk_rxq;
-	uint16_t nb_rx;
+	uint16_t nb_rx = 0;
+	uint16_t tn = 0;
+	uint64_t err = 0;
+	bool no_cq = false;
 
-	nb_rx = rxq_burst_v(rxq, pkts, pkts_n);
-	if (unlikely(rxq->pending_err))
-		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
-	return nb_rx;
+	do {
+		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
+				    &err, &no_cq);
+		if (unlikely(err | rxq->err_state))
+			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
+		tn += nb_rx;
+		if (unlikely(no_cq))
+			break;
+	} while (tn != pkts_n);
+	return tn;
 }
 
 /**
- * Check Tx queue flags are set for raw vectorized Tx.
+ * Receive burst of packets. An errored completion also consumes a mbuf, but the
+ * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
+ * before returning to application.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ * @param[out] err
+ *   Pointer to a flag. Set non-zero value if pkts array has at least one error
+ *   packet to handle.
+ * @param[out] no_cq
+ *   Pointer to a boolean. Set true if no new CQE seen.
  *
  * @return
- *   1 if supported, negative errno value if not.
+ *   Number of packets received including errors (<= pkts_n).
  */
-int __attribute__((cold))
-priv_check_raw_vec_tx_support(struct priv *priv)
+static inline uint16_t
+rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
+		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
 {
-	uint16_t i;
+	const uint16_t q_n = 1 << rxq->cqe_n;
+	const uint16_t q_mask = q_n - 1;
+	const uint16_t wqe_n = 1 << rxq->elts_n;
+	const uint32_t strd_n = 1 << rxq->strd_num_n;
+	const uint32_t elts_n = wqe_n * strd_n;
+	const uint32_t elts_mask = elts_n - 1;
+	volatile struct mlx5_cqe *cq;
+	struct rte_mbuf **elts;
+	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
+	uint16_t nocmp_n = 0;
+	uint16_t rcvd_pkt = 0;
+	uint16_t cp_pkt = 0;
+	unsigned int cq_idx = rxq->cq_ci & q_mask;
+	unsigned int elts_idx;
 
-	/* All the configured queues should support. */
-	for (i = 0; i < priv->txqs_n; ++i) {
-		struct mlx5_txq_data *txq = (*priv->txqs)[i];
+	MLX5_ASSERT(rxq->sges_n == 0);
+	cq = &(*rxq->cqes)[cq_idx];
+	rte_prefetch0(cq);
+	rte_prefetch0(cq + 1);
+	rte_prefetch0(cq + 2);
+	rte_prefetch0(cq + 3);
+	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
+	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
+	/* See if there're unreturned mbufs from compressed CQE. */
+	rcvd_pkt = rxq->decompressed;
+	if (rcvd_pkt > 0) {
+		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
+		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
+		rxq->decompressed -= rcvd_pkt;
+		pkts += cp_pkt;
+	}
+	elts_idx = rxq->rq_pi & elts_mask;
+	elts = &(*rxq->elts)[elts_idx];
+	/* Not to overflow pkts array. */
+	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
+	/* Not to cross queue end. */
+	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
+	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
+	/* Not to move past the allocated mbufs. */
+	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
+	if (!pkts_n) {
+		*no_cq = !cp_pkt;
+		return cp_pkt;
+	}
+	/* At this point, there shouldn't be any remaining packets. */
+	MLX5_ASSERT(rxq->decompressed == 0);
+	/* Process all the CQEs */
+	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
+	/* If no new CQE seen, return without updating cq_db. */
+	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
+		*no_cq = true;
+		return cp_pkt;
+	}
+	/* Update the consumer indexes for non-compressed CQEs. */
+	MLX5_ASSERT(nocmp_n <= pkts_n);
+	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
+	rcvd_pkt += cp_pkt;
+	/* Decompress the last CQE if compressed. */
+	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
+		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
+		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
+							&elts[nocmp_n]);
+		/* Return more packets if needed. */
+		if (nocmp_n < pkts_n) {
+			uint16_t n = rxq->decompressed;
 
-		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) ||
-		    !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
-			break;
+			n = RTE_MIN(n, pkts_n - nocmp_n);
+			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
+			rcvd_pkt += cp_pkt;
+			rxq->decompressed -= n;
+		}
 	}
-	if (i != priv->txqs_n)
-		return -ENOTSUP;
-	return 1;
+	*no_cq = !rcvd_pkt;
+	return rcvd_pkt;
 }
 
 /**
- * Check a device can support vectorized TX.
+ * DPDK callback for vectorized MPRQ RX.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dpdk_rxq
+ *   Generic pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
  *
  * @return
- *   1 if supported, negative errno value if not.
+ *   Number of packets successfully received (<= pkts_n).
  */
-int __attribute__((cold))
-priv_check_vec_tx_support(struct priv *priv)
+uint16_t
+mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	if (!priv->tx_vec_en ||
-	    priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
-	    priv->mps != MLX5_MPW_ENHANCED ||
-	    priv->tso)
-		return -ENOTSUP;
-	return 1;
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
+	uint16_t nb_rx = 0;
+	uint16_t tn = 0;
+	uint64_t err = 0;
+	bool no_cq = false;
+
+	do {
+		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
+					 &err, &no_cq);
+		if (unlikely(err | rxq->err_state))
+			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
+		tn += nb_rx;
+		if (unlikely(no_cq))
+			break;
+	} while (tn != pkts_n);
+	return tn;
 }
 
 /**
@@ -344,13 +529,15 @@ priv_check_vec_tx_support(struct priv *priv)
  * @return
  *   1 if supported, negative errno value if not.
  */
-int __attribute__((cold))
-rxq_check_vec_support(struct mlx5_rxq_data *rxq)
+int __rte_cold
+mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
 	struct mlx5_rxq_ctrl *ctrl =
 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 
-	if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0)
+	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
+		return -ENOTSUP;
+	if (rxq->lro)
 		return -ENOTSUP;
 	return 1;
 }
@@ -358,18 +545,21 @@ rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 /**
  * Check a device can support vectorized RX.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
  * @return
  *   1 if supported, negative errno value if not.
  */
-int __attribute__((cold))
-priv_check_vec_rx_support(struct priv *priv)
+int __rte_cold
+mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
 {
-	uint16_t i;
+	struct mlx5_priv *priv = dev->data->dev_private;
+	uint32_t i;
 
-	if (!priv->rx_vec_en)
+	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
+		return -ENOTSUP;
+	if (!priv->config.rx_vec_en)
 		return -ENOTSUP;
 	/* All the configured queues should support. */
 	for (i = 0; i < priv->rxqs_n; ++i) {
@@ -377,7 +567,7 @@ priv_check_vec_rx_support(struct priv *priv)
 
 		if (!rxq)
 			continue;
-		if (rxq_check_vec_support(rxq) < 0)
+		if (mlx5_rxq_check_vec_support(rxq) < 0)
 			break;
 	}
 	if (i != priv->rxqs_n)