net/mlx5: extend Rx completion with error handling
[dpdk.git] / drivers / net / mlx5 / mlx5_rxtx.c
index 5278594..8103612 100644 (file)
@@ -25,6 +25,7 @@
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
 #include <rte_ether.h>
+#include <rte_cycles.h>
 
 #include "mlx5.h"
 #include "mlx5_utils.h"
@@ -38,7 +39,7 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
 
 static __rte_always_inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
-                uint16_t cqe_cnt, uint32_t *rss_hash);
+                uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
 
 static __rte_always_inline uint32_t
 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
@@ -417,20 +418,17 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 }
 
 /**
- * DPDK callback to check the status of a rx descriptor.
+ * Internal function to compute the number of used descriptors in an RX queue
  *
- * @param rx_queue
- *   The rx queue.
- * @param[in] offset
- *   The index of the descriptor in the ring.
+ * @param rxq
+ *   The Rx queue.
  *
  * @return
- *   The status of the tx descriptor.
+ *   The number of used rx descriptor.
  */
-int
-mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
+static uint32_t
+rx_queue_count(struct mlx5_rxq_data *rxq)
 {
-       struct mlx5_rxq_data *rxq = rx_queue;
        struct rxq_zip *zip = &rxq->zip;
        volatile struct mlx5_cqe *cqe;
        const unsigned int cqe_n = (1 << rxq->cqe_n);
@@ -447,7 +445,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
                cq_ci = rxq->cq_ci;
        }
        cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
-       while (check_cqe(cqe, cqe_n, cq_ci) == 0) {
+       while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) {
                int8_t op_own;
                unsigned int n;
 
@@ -461,11 +459,116 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
                cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
        }
        used = RTE_MIN(used, (1U << rxq->elts_n) - 1);
-       if (offset < used)
+       return used;
+}
+
+/**
+ * DPDK callback to check the status of a rx descriptor.
+ *
+ * @param rx_queue
+ *   The Rx queue.
+ * @param[in] offset
+ *   The index of the descriptor in the ring.
+ *
+ * @return
+ *   The status of the tx descriptor.
+ */
+int
+mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
+{
+       struct mlx5_rxq_data *rxq = rx_queue;
+       struct mlx5_rxq_ctrl *rxq_ctrl =
+                       container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+       struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
+
+       if (dev->rx_pkt_burst != mlx5_rx_burst) {
+               rte_errno = ENOTSUP;
+               return -rte_errno;
+       }
+       if (offset >= (1 << rxq->elts_n)) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+       if (offset < rx_queue_count(rxq))
                return RTE_ETH_RX_DESC_DONE;
        return RTE_ETH_RX_DESC_AVAIL;
 }
 
+/**
+ * DPDK callback to get the number of used descriptors in a RX queue
+ *
+ * @param dev
+ *   Pointer to the device structure.
+ *
+ * @param rx_queue_id
+ *   The Rx queue.
+ *
+ * @return
+ *   The number of used rx descriptor.
+ *   -EINVAL if the queue is invalid
+ */
+uint32_t
+mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_rxq_data *rxq;
+
+       if (dev->rx_pkt_burst != mlx5_rx_burst) {
+               rte_errno = ENOTSUP;
+               return -rte_errno;
+       }
+       rxq = (*priv->rxqs)[rx_queue_id];
+       if (!rxq) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+       return rx_queue_count(rxq);
+}
+
+#define MLX5_SYSTEM_LOG_DIR "/var/log"
+/**
+ * Dump debug information to log file.
+ *
+ * @param fname
+ *   The file name.
+ * @param hex_title
+ *   If not NULL this string is printed as a header to the output
+ *   and the output will be in hexadecimal view.
+ * @param buf
+ *   This is the buffer address to print out.
+ * @param len
+ *   The number of bytes to dump out.
+ */
+void
+mlx5_dump_debug_information(const char *fname, const char *hex_title,
+                           const void *buf, unsigned int hex_len)
+{
+       FILE *fd;
+
+       MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname);
+       fd = fopen(path, "a+");
+       if (!fd) {
+               DRV_LOG(WARNING, "cannot open %s for debug dump\n",
+                       path);
+               MKSTR(path2, "./%s", fname);
+               fd = fopen(path2, "a+");
+               if (!fd) {
+                       DRV_LOG(ERR, "cannot open %s for debug dump\n",
+                               path2);
+                       return;
+               }
+               DRV_LOG(INFO, "New debug dump in file %s\n", path2);
+       } else {
+               DRV_LOG(INFO, "New debug dump in file %s\n", path);
+       }
+       if (hex_title)
+               rte_hexdump(fd, hex_title, buf, hex_len);
+       else
+               fprintf(fd, "%s", (const char *)buf);
+       fprintf(fd, "\n\n\n");
+       fclose(fd);
+}
+
 /**
  * DPDK callback for TX.
  *
@@ -495,6 +598,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
        unsigned int segs_n = 0;
        const unsigned int max_inline = txq->max_inline;
+       uint64_t addr_64;
 
        if (unlikely(!pkts_n))
                return 0;
@@ -503,8 +607,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        /* Start processing. */
        mlx5_tx_complete(txq);
        max_elts = (elts_n - (elts_head - txq->elts_tail));
-       /* A CQE slot must always be available. */
-       assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
        max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
        if (unlikely(!max_wqe))
                return 0;
@@ -524,6 +626,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
                uint32_t swp_offsets = 0;
                uint8_t swp_types = 0;
+               rte_be32_t metadata;
                uint16_t tso_segsz = 0;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                uint32_t total_length = 0;
@@ -567,11 +670,14 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                cs_flags = txq_ol_cksum_to_cs(buf);
                txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);
                raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
+               /* Copy metadata from mbuf if valid */
+               metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+                                                            0;
                /* Replace the Ethernet type by the VLAN if necessary. */
                if (buf->ol_flags & PKT_TX_VLAN_PKT) {
                        uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
                                                         buf->vlan_tci);
-                       unsigned int len = 2 * ETHER_ADDR_LEN - 2;
+                       unsigned int len = 2 * RTE_ETHER_ADDR_LEN - 2;
 
                        addr += 2;
                        length -= 2;
@@ -632,7 +738,8 @@ pkt_inline:
                                                   RTE_CACHE_LINE_SIZE);
                        copy_b = (addr_end > addr) ?
                                 RTE_MIN((addr_end - addr), length) : 0;
-                       if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
+                       if (copy_b && ((end - (uintptr_t)raw) >
+                                      (copy_b + sizeof(inl)))) {
                                /*
                                 * One Dseg remains in the current WQE.  To
                                 * keep the computation positive, it is
@@ -711,12 +818,12 @@ pkt_inline:
                        ds = 3;
 use_dseg:
                        /* Add the remaining packet as a simple ds. */
-                       addr = rte_cpu_to_be_64(addr);
+                       addr_64 = rte_cpu_to_be_64(addr);
                        *dseg = (rte_v128u32_t){
                                rte_cpu_to_be_32(length),
                                mlx5_tx_mb2mr(txq, buf),
-                               addr,
-                               addr >> 32,
+                               addr_64,
+                               addr_64 >> 32,
                        };
                        ++ds;
                        if (!segs_n)
@@ -750,12 +857,12 @@ next_seg:
                total_length += length;
 #endif
                /* Store segment information. */
-               addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
+               addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
                *dseg = (rte_v128u32_t){
                        rte_cpu_to_be_32(length),
                        mlx5_tx_mb2mr(txq, buf),
-                       addr,
-                       addr >> 32,
+                       addr_64,
+                       addr_64 >> 32,
                };
                (*txq->elts)[++elts_head & elts_m] = buf;
                if (--segs_n)
@@ -782,7 +889,7 @@ next_pkt:
                                swp_offsets,
                                cs_flags | (swp_types << 8) |
                                (rte_cpu_to_be_16(tso_segsz) << 16),
-                               0,
+                               metadata,
                                (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
                        };
                } else {
@@ -796,7 +903,7 @@ next_pkt:
                        wqe->eseg = (rte_v128u32_t){
                                swp_offsets,
                                cs_flags | (swp_types << 8),
-                               0,
+                               metadata,
                                (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
                        };
                }
@@ -816,14 +923,13 @@ next_wqe:
        /* Check whether completion threshold has been reached. */
        comp = txq->elts_comp + i + j + k;
        if (comp >= MLX5_TX_COMP_THRESH) {
+               /* A CQE slot must always be available. */
+               assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
                /* Request completion on last WQE. */
                last_wqe->ctrl2 = rte_cpu_to_be_32(8);
                /* Save elts_head in unused "immediate" field of WQE. */
                last_wqe->ctrl3 = txq->elts_head;
                txq->elts_comp = 0;
-#ifndef NDEBUG
-               ++txq->cq_pi;
-#endif
        } else {
                txq->elts_comp = comp;
        }
@@ -863,7 +969,7 @@ mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
        mpw->wqe->eseg.inline_hdr_sz = 0;
        mpw->wqe->eseg.rsvd0 = 0;
        mpw->wqe->eseg.rsvd1 = 0;
-       mpw->wqe->eseg.rsvd2 = 0;
+       mpw->wqe->eseg.flow_table_metadata = 0;
        mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
                                             (txq->wqe_ci << 8) |
                                             MLX5_OPCODE_TSO);
@@ -942,8 +1048,6 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        /* Start processing. */
        mlx5_tx_complete(txq);
        max_elts = (elts_n - (elts_head - txq->elts_tail));
-       /* A CQE slot must always be available. */
-       assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
        max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
        if (unlikely(!max_wqe))
                return 0;
@@ -952,6 +1056,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                uint32_t length;
                unsigned int segs_n = buf->nb_segs;
                uint32_t cs_flags;
+               rte_be32_t metadata;
 
                /*
                 * Make sure there is enough room to store this packet and
@@ -968,6 +1073,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                max_elts -= segs_n;
                --pkts_n;
                cs_flags = txq_ol_cksum_to_cs(buf);
+               /* Copy metadata from mbuf if valid */
+               metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+                                                            0;
                /* Retrieve packet information. */
                length = PKT_LEN(buf);
                assert(length);
@@ -975,6 +1083,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
                    ((mpw.len != length) ||
                     (segs_n != 1) ||
+                    (mpw.wqe->eseg.flow_table_metadata != metadata) ||
                     (mpw.wqe->eseg.cs_flags != cs_flags)))
                        mlx5_mpw_close(txq, &mpw);
                if (mpw.state == MLX5_MPW_STATE_CLOSED) {
@@ -988,6 +1097,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        max_wqe -= 2;
                        mlx5_mpw_new(txq, &mpw, length);
                        mpw.wqe->eseg.cs_flags = cs_flags;
+                       mpw.wqe->eseg.flow_table_metadata = metadata;
                }
                /* Multi-segment packets must be alone in their MPW. */
                assert((segs_n == 1) || (mpw.pkts_n == 0));
@@ -1032,14 +1142,13 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        if (comp >= MLX5_TX_COMP_THRESH) {
                volatile struct mlx5_wqe *wqe = mpw.wqe;
 
+               /* A CQE slot must always be available. */
+               assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
                /* Request completion on last WQE. */
                wqe->ctrl[2] = rte_cpu_to_be_32(8);
                /* Save elts_head in unused "immediate" field of WQE. */
                wqe->ctrl[3] = elts_head;
                txq->elts_comp = 0;
-#ifndef NDEBUG
-               ++txq->cq_pi;
-#endif
        } else {
                txq->elts_comp = comp;
        }
@@ -1087,7 +1196,7 @@ mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
        mpw->wqe->eseg.cs_flags = 0;
        mpw->wqe->eseg.rsvd0 = 0;
        mpw->wqe->eseg.rsvd1 = 0;
-       mpw->wqe->eseg.rsvd2 = 0;
+       mpw->wqe->eseg.flow_table_metadata = 0;
        inl = (struct mlx5_wqe_inl_small *)
                (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
        mpw->data.raw = (uint8_t *)&inl->raw;
@@ -1171,14 +1280,13 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
        /* Start processing. */
        mlx5_tx_complete(txq);
        max_elts = (elts_n - (elts_head - txq->elts_tail));
-       /* A CQE slot must always be available. */
-       assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
        do {
                struct rte_mbuf *buf = *(pkts++);
                uintptr_t addr;
                uint32_t length;
                unsigned int segs_n = buf->nb_segs;
                uint8_t cs_flags;
+               rte_be32_t metadata;
 
                /*
                 * Make sure there is enough room to store this packet and
@@ -1200,18 +1308,23 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                 */
                max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
                cs_flags = txq_ol_cksum_to_cs(buf);
+               /* Copy metadata from mbuf if valid */
+               metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+                                                            0;
                /* Retrieve packet information. */
                length = PKT_LEN(buf);
                /* Start new session if packet differs. */
                if (mpw.state == MLX5_MPW_STATE_OPENED) {
                        if ((mpw.len != length) ||
                            (segs_n != 1) ||
+                           (mpw.wqe->eseg.flow_table_metadata != metadata) ||
                            (mpw.wqe->eseg.cs_flags != cs_flags))
                                mlx5_mpw_close(txq, &mpw);
                } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
                        if ((mpw.len != length) ||
                            (segs_n != 1) ||
                            (length > inline_room) ||
+                           (mpw.wqe->eseg.flow_table_metadata != metadata) ||
                            (mpw.wqe->eseg.cs_flags != cs_flags)) {
                                mlx5_mpw_inline_close(txq, &mpw);
                                inline_room =
@@ -1231,12 +1344,14 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                                max_wqe -= 2;
                                mlx5_mpw_new(txq, &mpw, length);
                                mpw.wqe->eseg.cs_flags = cs_flags;
+                               mpw.wqe->eseg.flow_table_metadata = metadata;
                        } else {
                                if (unlikely(max_wqe < wqe_inl_n))
                                        break;
                                max_wqe -= wqe_inl_n;
                                mlx5_mpw_inline_new(txq, &mpw, length);
                                mpw.wqe->eseg.cs_flags = cs_flags;
+                               mpw.wqe->eseg.flow_table_metadata = metadata;
                        }
                }
                /* Multi-segment packets must be alone in their MPW. */
@@ -1329,14 +1444,13 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
        if (comp >= MLX5_TX_COMP_THRESH) {
                volatile struct mlx5_wqe *wqe = mpw.wqe;
 
+               /* A CQE slot must always be available. */
+               assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
                /* Request completion on last WQE. */
                wqe->ctrl[2] = rte_cpu_to_be_32(8);
                /* Save elts_head in unused "immediate" field of WQE. */
                wqe->ctrl[3] = elts_head;
                txq->elts_comp = 0;
-#ifndef NDEBUG
-               ++txq->cq_pi;
-#endif
        } else {
                txq->elts_comp = comp;
        }
@@ -1450,6 +1564,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
        unsigned int mpw_room = 0;
        unsigned int inl_pad = 0;
        uint32_t inl_hdr;
+       uint64_t addr_64;
        struct mlx5_mpw mpw = {
                .state = MLX5_MPW_STATE_CLOSED,
        };
@@ -1459,8 +1574,6 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
        /* Start processing. */
        mlx5_tx_complete(txq);
        max_elts = (elts_n - (elts_head - txq->elts_tail));
-       /* A CQE slot must always be available. */
-       assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
        max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
        if (unlikely(!max_wqe))
                return 0;
@@ -1470,6 +1583,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                unsigned int do_inline = 0; /* Whether inline is possible. */
                uint32_t length;
                uint8_t cs_flags;
+               rte_be32_t metadata;
 
                /* Multi-segmented packet is handled in slow-path outside. */
                assert(NB_SEGS(buf) == 1);
@@ -1477,6 +1591,9 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                if (max_elts - j == 0)
                        break;
                cs_flags = txq_ol_cksum_to_cs(buf);
+               /* Copy metadata from mbuf if valid */
+               metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+                                                            0;
                /* Retrieve packet information. */
                length = PKT_LEN(buf);
                /* Start new session if:
@@ -1491,6 +1608,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                            (length <= txq->inline_max_packet_sz &&
                             inl_pad + sizeof(inl_hdr) + length >
                             mpw_room) ||
+                            (mpw.wqe->eseg.flow_table_metadata != metadata) ||
                            (mpw.wqe->eseg.cs_flags != cs_flags))
                                max_wqe -= mlx5_empw_close(txq, &mpw);
                }
@@ -1514,6 +1632,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                                    sizeof(inl_hdr) + length <= mpw_room &&
                                    !txq->mpw_hdr_dseg;
                        mpw.wqe->eseg.cs_flags = cs_flags;
+                       mpw.wqe->eseg.flow_table_metadata = metadata;
                } else {
                        /* Evaluate whether the next packet can be inlined.
                         * Inlininig is possible when:
@@ -1586,13 +1705,13 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                                        ((uintptr_t)mpw.data.raw +
                                         inl_pad);
                        (*txq->elts)[elts_head++ & elts_m] = buf;
-                       addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
-                                                                uintptr_t));
+                       addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+                                                                   uintptr_t));
                        *dseg = (rte_v128u32_t) {
                                rte_cpu_to_be_32(length),
                                mlx5_tx_mb2mr(txq, buf),
-                               addr,
-                               addr >> 32,
+                               addr_64,
+                               addr_64 >> 32,
                        };
                        mpw.data.raw = (volatile void *)(dseg + 1);
                        mpw.total_len += (inl_pad + sizeof(*dseg));
@@ -1616,15 +1735,14 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                         (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) {
                volatile struct mlx5_wqe *wqe = mpw.wqe;
 
+               /* A CQE slot must always be available. */
+               assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
                /* Request completion on last WQE. */
                wqe->ctrl[2] = rte_cpu_to_be_32(8);
                /* Save elts_head in unused "immediate" field of WQE. */
                wqe->ctrl[3] = elts_head;
                txq->elts_comp = 0;
                txq->mpw_comp = txq->wqe_ci;
-#ifndef NDEBUG
-               ++txq->cq_pi;
-#endif
        } else {
                txq->elts_comp += j;
        }
@@ -1713,6 +1831,183 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
        return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
 }
 
+/**
+ * Initialize Rx WQ and indexes.
+ *
+ * @param[in] rxq
+ *   Pointer to RX queue structure.
+ */
+void
+mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
+{
+       const unsigned int wqe_n = 1 << rxq->elts_n;
+       unsigned int i;
+
+       for (i = 0; (i != wqe_n); ++i) {
+               volatile struct mlx5_wqe_data_seg *scat;
+               uintptr_t addr;
+               uint32_t byte_count;
+
+               if (mlx5_rxq_mprq_enabled(rxq)) {
+                       struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i];
+
+                       scat = &((volatile struct mlx5_wqe_mprq *)
+                               rxq->wqes)[i].dseg;
+                       addr = (uintptr_t)mlx5_mprq_buf_addr(buf);
+                       byte_count = (1 << rxq->strd_sz_n) *
+                                       (1 << rxq->strd_num_n);
+               } else {
+                       struct rte_mbuf *buf = (*rxq->elts)[i];
+
+                       scat = &((volatile struct mlx5_wqe_data_seg *)
+                                       rxq->wqes)[i];
+                       addr = rte_pktmbuf_mtod(buf, uintptr_t);
+                       byte_count = DATA_LEN(buf);
+               }
+               /* scat->addr must be able to store a pointer. */
+               assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+               *scat = (struct mlx5_wqe_data_seg){
+                       .addr = rte_cpu_to_be_64(addr),
+                       .byte_count = rte_cpu_to_be_32(byte_count),
+                       .lkey = mlx5_rx_addr2mr(rxq, addr),
+               };
+       }
+       rxq->consumed_strd = 0;
+       rxq->decompressed = 0;
+       rxq->rq_pi = 0;
+       rxq->zip = (struct rxq_zip){
+               .ai = 0,
+       };
+       /* Update doorbell counter. */
+       rxq->rq_ci = wqe_n >> rxq->sges_n;
+       rte_cio_wmb();
+       *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+}
+
+/**
+ * Handle a Rx error.
+ * The function inserts the RQ state to reset when the first error CQE is
+ * shown, then drains the CQ by the caller function loop. When the CQ is empty,
+ * it moves the RQ state to ready and initializes the RQ.
+ * Next CQE identification and error counting are in the caller responsibility.
+ *
+ * @param[in] rxq
+ *   Pointer to RX queue structure.
+ * @param[in] mbuf_prepare
+ *   Whether to prepare mbufs for the RQ.
+ *
+ * @return
+ *   -1 in case of recovery error, otherwise the CQE status.
+ */
+int
+mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
+{
+       const uint16_t cqe_n = 1 << rxq->cqe_n;
+       const uint16_t cqe_mask = cqe_n - 1;
+       const unsigned int wqe_n = 1 << rxq->elts_n;
+       struct mlx5_rxq_ctrl *rxq_ctrl =
+                       container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+       struct ibv_wq_attr mod = {
+               .attr_mask = IBV_WQ_ATTR_STATE,
+       };
+       union {
+               volatile struct mlx5_cqe *cqe;
+               volatile struct mlx5_err_cqe *err_cqe;
+       } u = {
+               .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
+       };
+       int ret;
+
+       switch (rxq->err_state) {
+       case MLX5_RXQ_ERR_STATE_NO_ERROR:
+               rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
+               /* Fall-through */
+       case MLX5_RXQ_ERR_STATE_NEED_RESET:
+               if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+                       return -1;
+               mod.wq_state = IBV_WQS_RESET;
+               ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
+               if (ret) {
+                       DRV_LOG(ERR, "Cannot change Rx WQ state to RESET %s\n",
+                               strerror(errno));
+                       return -1;
+               }
+               if (rxq_ctrl->dump_file_n <
+                   rxq_ctrl->priv->config.max_dump_files_num) {
+                       MKSTR(err_str, "Unexpected CQE error syndrome "
+                             "0x%02x CQN = %u RQN = %u wqe_counter = %u"
+                             " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
+                             rxq->cqn, rxq_ctrl->ibv->wq->wq_num,
+                             rte_be_to_cpu_16(u.err_cqe->wqe_counter),
+                             rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
+                       MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
+                             rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc());
+                       mlx5_dump_debug_information(name, NULL, err_str, 0);
+                       mlx5_dump_debug_information(name, "MLX5 Error CQ:",
+                                                   (const void *)((uintptr_t)
+                                                                   rxq->cqes),
+                                                   sizeof(*u.cqe) * cqe_n);
+                       mlx5_dump_debug_information(name, "MLX5 Error RQ:",
+                                                   (const void *)((uintptr_t)
+                                                                   rxq->wqes),
+                                                   16 * wqe_n);
+                       rxq_ctrl->dump_file_n++;
+               }
+               rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY;
+               /* Fall-through */
+       case MLX5_RXQ_ERR_STATE_NEED_READY:
+               ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
+               if (ret == MLX5_CQE_STATUS_HW_OWN) {
+                       rte_cio_wmb();
+                       *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+                       rte_cio_wmb();
+                       /*
+                        * The RQ consumer index must be zeroed while moving
+                        * from RESET state to RDY state.
+                        */
+                       *rxq->rq_db = rte_cpu_to_be_32(0);
+                       rte_cio_wmb();
+                       mod.wq_state = IBV_WQS_RDY;
+                       ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
+                       if (ret) {
+                               DRV_LOG(ERR, "Cannot change Rx WQ state to RDY"
+                                       " %s\n", strerror(errno));
+                               return -1;
+                       }
+                       if (mbuf_prepare) {
+                               const uint16_t q_mask = wqe_n - 1;
+                               uint16_t elt_idx;
+                               struct rte_mbuf **elt;
+                               int i;
+                               unsigned int n = wqe_n - (rxq->rq_ci -
+                                                         rxq->rq_pi);
+
+                               for (i = 0; i < (int)n; ++i) {
+                                       elt_idx = (rxq->rq_ci + i) & q_mask;
+                                       elt = &(*rxq->elts)[elt_idx];
+                                       *elt = rte_mbuf_raw_alloc(rxq->mp);
+                                       if (!*elt) {
+                                               for (i--; i >= 0; --i) {
+                                                       elt_idx = (rxq->rq_ci +
+                                                                  i) & q_mask;
+                                                       elt = &(*rxq->elts)
+                                                               [elt_idx];
+                                                       rte_pktmbuf_free_seg
+                                                               (*elt);
+                                               }
+                                               return -1;
+                                       }
+                               }
+                       }
+                       mlx5_rxq_initialize(rxq);
+                       rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+               }
+               return ret;
+       default:
+               return -1;
+       }
+}
+
 /**
  * Get size of the next packet for a given CQE. For compressed CQEs, the
  * consumer index is updated only once all packets of the current one have
@@ -1722,112 +2017,131 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
  *   Pointer to RX queue.
  * @param cqe
  *   CQE to process.
- * @param[out] rss_hash
- *   Packet RSS Hash result.
+ * @param[out] mcqe
+ *   Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
+ *   written.
  *
  * @return
- *   Packet size in bytes (0 if there is none), -1 in case of completion
- *   with error.
+ *   0 in case of empty CQE, otherwise the packet size in bytes.
  */
 static inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
-                uint16_t cqe_cnt, uint32_t *rss_hash)
+                uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
 {
        struct rxq_zip *zip = &rxq->zip;
        uint16_t cqe_n = cqe_cnt + 1;
-       int len = 0;
+       int len;
        uint16_t idx, end;
 
-       /* Process compressed data in the CQE and mini arrays. */
-       if (zip->ai) {
-               volatile struct mlx5_mini_cqe8 (*mc)[8] =
-                       (volatile struct mlx5_mini_cqe8 (*)[8])
-                       (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info);
-
-               len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
-               *rss_hash = rte_be_to_cpu_32((*mc)[zip->ai & 7].rx_hash_result);
-               if ((++zip->ai & 7) == 0) {
-                       /* Invalidate consumed CQEs */
-                       idx = zip->ca;
-                       end = zip->na;
-                       while (idx != end) {
-                               (*rxq->cqes)[idx & cqe_cnt].op_own =
-                                       MLX5_CQE_INVALIDATE;
-                               ++idx;
-                       }
-                       /*
-                        * Increment consumer index to skip the number of
-                        * CQEs consumed. Hardware leaves holes in the CQ
-                        * ring for software use.
-                        */
-                       zip->ca = zip->na;
-                       zip->na += 8;
-               }
-               if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
-                       /* Invalidate the rest */
-                       idx = zip->ca;
-                       end = zip->cq_ci;
-
-                       while (idx != end) {
-                               (*rxq->cqes)[idx & cqe_cnt].op_own =
-                                       MLX5_CQE_INVALIDATE;
-                               ++idx;
-                       }
-                       rxq->cq_ci = zip->cq_ci;
-                       zip->ai = 0;
-               }
-       /* No compressed data, get next CQE and verify if it is compressed. */
-       } else {
-               int ret;
-               int8_t op_own;
-
-               ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
-               if (unlikely(ret == 1))
-                       return 0;
-               ++rxq->cq_ci;
-               op_own = cqe->op_own;
-               rte_cio_rmb();
-               if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
+       do {
+               len = 0;
+               /* Process compressed data in the CQE and mini arrays. */
+               if (zip->ai) {
                        volatile struct mlx5_mini_cqe8 (*mc)[8] =
                                (volatile struct mlx5_mini_cqe8 (*)[8])
-                               (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci &
+                               (uintptr_t)(&(*rxq->cqes)[zip->ca &
                                                          cqe_cnt].pkt_info);
 
-                       /* Fix endianness. */
-                       zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
-                       /*
-                        * Current mini array position is the one returned by
-                        * check_cqe64().
-                        *
-                        * If completion comprises several mini arrays, as a
-                        * special case the second one is located 7 CQEs after
-                        * the initial CQE instead of 8 for subsequent ones.
-                        */
-                       zip->ca = rxq->cq_ci;
-                       zip->na = zip->ca + 7;
-                       /* Compute the next non compressed CQE. */
-                       --rxq->cq_ci;
-                       zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
-                       /* Get packet size to return. */
-                       len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
-                       *rss_hash = rte_be_to_cpu_32((*mc)[0].rx_hash_result);
-                       zip->ai = 1;
-                       /* Prefetch all the entries to be invalidated */
-                       idx = zip->ca;
-                       end = zip->cq_ci;
-                       while (idx != end) {
-                               rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]);
-                               ++idx;
+                       len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
+                       *mcqe = &(*mc)[zip->ai & 7];
+                       if ((++zip->ai & 7) == 0) {
+                               /* Invalidate consumed CQEs */
+                               idx = zip->ca;
+                               end = zip->na;
+                               while (idx != end) {
+                                       (*rxq->cqes)[idx & cqe_cnt].op_own =
+                                               MLX5_CQE_INVALIDATE;
+                                       ++idx;
+                               }
+                               /*
+                                * Increment consumer index to skip the number
+                                * of CQEs consumed. Hardware leaves holes in
+                                * the CQ ring for software use.
+                                */
+                               zip->ca = zip->na;
+                               zip->na += 8;
+                       }
+                       if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
+                               /* Invalidate the rest */
+                               idx = zip->ca;
+                               end = zip->cq_ci;
+
+                               while (idx != end) {
+                                       (*rxq->cqes)[idx & cqe_cnt].op_own =
+                                               MLX5_CQE_INVALIDATE;
+                                       ++idx;
+                               }
+                               rxq->cq_ci = zip->cq_ci;
+                               zip->ai = 0;
+                       }
+               /*
+                * No compressed data, get next CQE and verify if it is
+                * compressed.
+                */
+               } else {
+                       int ret;
+                       int8_t op_own;
+
+                       ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
+                       if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+                               if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
+                                            rxq->err_state)) {
+                                       ret = mlx5_rx_err_handle(rxq, 0);
+                                       if (ret == MLX5_CQE_STATUS_HW_OWN ||
+                                           ret == -1)
+                                               return 0;
+                               } else {
+                                       return 0;
+                               }
+                       }
+                       ++rxq->cq_ci;
+                       op_own = cqe->op_own;
+                       if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
+                               volatile struct mlx5_mini_cqe8 (*mc)[8] =
+                                       (volatile struct mlx5_mini_cqe8 (*)[8])
+                                       (uintptr_t)(&(*rxq->cqes)
+                                               [rxq->cq_ci &
+                                                cqe_cnt].pkt_info);
+
+                               /* Fix endianness. */
+                               zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
+                               /*
+                                * Current mini array position is the one
+                                * returned by check_cqe64().
+                                *
+                                * If completion comprises several mini arrays,
+                                * as a special case the second one is located
+                                * 7 CQEs after the initial CQE instead of 8
+                                * for subsequent ones.
+                                */
+                               zip->ca = rxq->cq_ci;
+                               zip->na = zip->ca + 7;
+                               /* Compute the next non compressed CQE. */
+                               --rxq->cq_ci;
+                               zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
+                               /* Get packet size to return. */
+                               len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
+                               *mcqe = &(*mc)[0];
+                               zip->ai = 1;
+                               /* Prefetch all to be invalidated */
+                               idx = zip->ca;
+                               end = zip->cq_ci;
+                               while (idx != end) {
+                                       rte_prefetch0(&(*rxq->cqes)[(idx) &
+                                                                   cqe_cnt]);
+                                       ++idx;
+                               }
+                       } else {
+                               len = rte_be_to_cpu_32(cqe->byte_cnt);
                        }
+               }
+               if (unlikely(rxq->err_state)) {
+                       cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+                       ++rxq->stats.idropped;
                } else {
-                       len = rte_be_to_cpu_32(cqe->byte_cnt);
-                       *rss_hash = rte_be_to_cpu_32(cqe->rx_hash_res);
+                       return len;
                }
-               /* Error while receiving packet. */
-               if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
-                       return -1;
-       }
-       return len;
+       } while (1);
 }
 
 /**
@@ -1934,7 +2248,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                volatile struct mlx5_wqe_data_seg *wqe =
                        &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
                struct rte_mbuf *rep = (*rxq->elts)[idx];
-               uint32_t rss_hash_res = 0;
+               volatile struct mlx5_mini_cqe8 *mcqe = NULL;
+               uint32_t rss_hash_res;
 
                if (pkt)
                        NEXT(seg) = rep;
@@ -1964,24 +2279,21 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                }
                if (!pkt) {
                        cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
-                       len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt,
-                                              &rss_hash_res);
+                       len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
                        if (!len) {
                                rte_mbuf_raw_free(rep);
                                break;
                        }
-                       if (unlikely(len == -1)) {
-                               /* RX error, packet is likely too large. */
-                               rte_mbuf_raw_free(rep);
-                               ++rxq->stats.idropped;
-                               goto skip;
-                       }
                        pkt = seg;
                        assert(len >= (rxq->crc_present << 2));
                        pkt->ol_flags = 0;
+                       /* If compressed, take hash result from mini-CQE. */
+                       rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ?
+                                                       cqe->rx_hash_res :
+                                                       mcqe->rx_hash_result);
                        rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
                        if (rxq->crc_present)
-                               len -= ETHER_CRC_LEN;
+                               len -= RTE_ETHER_CRC_LEN;
                        PKT_LEN(pkt) = len;
                }
                DATA_LEN(rep) = DATA_LEN(seg);
@@ -2014,7 +2326,6 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                pkt = NULL;
                --pkts_n;
                ++i;
-skip:
                /* Align consumer index to the next stride. */
                rq_ci >>= sges_n;
                ++rq_ci;
@@ -2103,8 +2414,8 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
        const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
        volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
        unsigned int i = 0;
-       uint16_t rq_ci = rxq->rq_ci;
-       uint16_t strd_idx = rxq->strd_ci;
+       uint32_t rq_ci = rxq->rq_ci;
+       uint16_t consumed_strd = rxq->consumed_strd;
        struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
 
        while (i < pkts_n) {
@@ -2112,12 +2423,14 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                void *addr;
                int ret;
                unsigned int len;
-               uint16_t consumed_strd;
+               uint16_t strd_cnt;
+               uint16_t strd_idx;
                uint32_t offset;
                uint32_t byte_cnt;
+               volatile struct mlx5_mini_cqe8 *mcqe = NULL;
                uint32_t rss_hash_res = 0;
 
-               if (strd_idx == strd_n) {
+               if (consumed_strd == strd_n) {
                        /* Replace WQE only if the buffer is still in use. */
                        if (rte_atomic16_read(&buf->refcnt) > 1) {
                                mprq_buf_replace(rxq, rq_ci & wq_mask);
@@ -2137,28 +2450,30 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                        rxq->mprq_repl = rep;
                        }
                        /* Advance to the next WQE. */
-                       strd_idx = 0;
+                       consumed_strd = 0;
                        ++rq_ci;
                        buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
                }
                cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
-               ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &rss_hash_res);
+               ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
                if (!ret)
                        break;
-               if (unlikely(ret == -1)) {
-                       /* RX error, packet is likely too large. */
-                       ++rxq->stats.idropped;
-                       continue;
-               }
                byte_cnt = ret;
-               consumed_strd = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
-                               MLX5_MPRQ_STRIDE_NUM_SHIFT;
-               assert(consumed_strd);
-               /* Calculate offset before adding up stride index. */
-               offset = strd_idx * strd_sz + strd_shift;
-               strd_idx += consumed_strd;
+               strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+                          MLX5_MPRQ_STRIDE_NUM_SHIFT;
+               assert(strd_cnt);
+               consumed_strd += strd_cnt;
                if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
                        continue;
+               if (mcqe == NULL) {
+                       rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
+                       strd_idx = rte_be_to_cpu_16(cqe->wqe_counter);
+               } else {
+                       /* mini-CQE for MPRQ doesn't have hash result. */
+                       strd_idx = rte_be_to_cpu_16(mcqe->stride_idx);
+               }
+               assert(strd_idx < strd_n);
+               assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask));
                /*
                 * Currently configured to receive a packet per a stride. But if
                 * MTU is adjusted through kernel interface, device could
@@ -2166,7 +2481,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                 * case, the packet should be dropped because it is bigger than
                 * the max_rx_pkt_len.
                 */
-               if (unlikely(consumed_strd > 1)) {
+               if (unlikely(strd_cnt > 1)) {
                        ++rxq->stats.idropped;
                        continue;
                }
@@ -2178,7 +2493,8 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
                assert((int)len >= (rxq->crc_present << 2));
                if (rxq->crc_present)
-                       len -= ETHER_CRC_LEN;
+                       len -= RTE_ETHER_CRC_LEN;
+               offset = strd_idx * strd_sz + strd_shift;
                addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset);
                /* Initialize the offload flag. */
                pkt->ol_flags = 0;
@@ -2201,7 +2517,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                } else {
                        rte_iova_t buf_iova;
                        struct rte_mbuf_ext_shared_info *shinfo;
-                       uint16_t buf_len = consumed_strd * strd_sz;
+                       uint16_t buf_len = strd_cnt * strd_sz;
 
                        /* Increment the refcnt of the whole chunk. */
                        rte_atomic16_add_return(&buf->refcnt, 1);
@@ -2250,7 +2566,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                ++i;
        }
        /* Update the consumer indexes. */
-       rxq->strd_ci = strd_idx;
+       rxq->consumed_strd = consumed_strd;
        rte_cio_wmb();
        *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
        if (rq_ci != rxq->rq_ci) {
@@ -2286,6 +2602,7 @@ removed_tx_burst(void *dpdk_txq __rte_unused,
                 struct rte_mbuf **pkts __rte_unused,
                 uint16_t pkts_n __rte_unused)
 {
+       rte_mb();
        return 0;
 }
 
@@ -2310,6 +2627,7 @@ removed_rx_burst(void *dpdk_txq __rte_unused,
                 struct rte_mbuf **pkts __rte_unused,
                 uint16_t pkts_n __rte_unused)
 {
+       rte_mb();
        return 0;
 }
 
@@ -2320,7 +2638,7 @@ removed_rx_burst(void *dpdk_txq __rte_unused,
  * (e.g.  mlx5_rxtx_vec_sse.c for x86).
  */
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
                      struct rte_mbuf **pkts __rte_unused,
                      uint16_t pkts_n __rte_unused)
@@ -2328,7 +2646,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
                  struct rte_mbuf **pkts __rte_unused,
                  uint16_t pkts_n __rte_unused)
@@ -2336,7 +2654,7 @@ mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
                  struct rte_mbuf **pkts __rte_unused,
                  uint16_t pkts_n __rte_unused)
@@ -2344,25 +2662,25 @@ mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
        return 0;
 }
 
-int __attribute__((weak))
+__rte_weak int
 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
 {
        return -ENOTSUP;
 }
 
-int __attribute__((weak))
+__rte_weak int
 mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
 {
        return -ENOTSUP;
 }
 
-int __attribute__((weak))
+__rte_weak int
 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
 {
        return -ENOTSUP;
 }
 
-int __attribute__((weak))
+__rte_weak int
 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)
 {
        return -ENOTSUP;