+/**
+ * Replenish buffers for RX in bulk.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ */
+static inline void
+mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
+{
+ const uint16_t q_n = 1 << rxq->elts_n;
+ const uint16_t q_mask = q_n - 1;
+ uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
+ uint16_t elts_idx = rxq->rq_ci & q_mask;
+ struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+ volatile struct mlx5_wqe_data_seg *wq =
+ &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
+ unsigned int i;
+
+ if (n >= rxq->rq_repl_thresh) {
+ MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
+ MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
+ MLX5_VPMD_DESCS_PER_LOOP);
+ /* Not to cross queue end. */
+ n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
+ if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+ rxq->stats.rx_nombuf += n;
+ return;
+ }
+ for (i = 0; i < n; ++i) {
+ void *buf_addr;
+
+ /*
+ * In order to support the mbufs with external attached
+ * data buffer we should use the buf_addr pointer
+ * instead of rte_mbuf_buf_addr(). It touches the mbuf
+ * itself and may impact the performance.
+ */
+ buf_addr = elts[i]->buf_addr;
+ wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
+ RTE_PKTMBUF_HEADROOM);
+ /* If there's a single MR, no need to replace LKey. */
+ if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
+ > 1))
+ wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
+ }
+ rxq->rq_ci += n;
+ /* Prevent overflowing into consumed mbufs. */
+ elts_idx = rxq->rq_ci & q_mask;
+ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+ (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
+ rte_io_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ }
+}
+
+/**
+ * Replenish buffers for MPRQ RX in bulk.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ */
+static inline void
+mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
+{
+ const uint16_t wqe_n = 1 << rxq->elts_n;
+ const uint32_t strd_n = 1 << rxq->strd_num_n;
+ const uint32_t elts_n = wqe_n * strd_n;
+ const uint32_t wqe_mask = elts_n - 1;
+ uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
+ uint32_t elts_idx = rxq->elts_ci & wqe_mask;
+ struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+ unsigned int i;
+
+ if (n >= rxq->rq_repl_thresh &&
+ rxq->elts_ci - rxq->rq_pi <= rxq->rq_repl_thresh) {
+ MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
+ MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
+ MLX5_VPMD_DESCS_PER_LOOP);
+ /* Not to cross queue end. */
+ n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
+ /* Limit replenish number to threshold value. */
+ n = RTE_MIN(n, rxq->rq_repl_thresh);
+ if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+ rxq->stats.rx_nombuf += n;
+ return;
+ }
+ rxq->elts_ci += n;
+ /* Prevent overflowing into consumed mbufs. */
+ elts_idx = rxq->elts_ci & wqe_mask;
+ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+ (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
+ }
+}
+
+/**
+ * Copy or attach MPRQ buffers to RX SW ring.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param pkts
+ * Pointer to array of packets to be stored.
+ * @param pkts_n
+ * Number of packets to be stored.
+ *
+ * @return
+ * Number of packets successfully copied/attached (<= pkts_n).
+ */
+static inline uint16_t
+rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
+ struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ const uint16_t wqe_n = 1 << rxq->elts_n;
+ const uint16_t wqe_mask = wqe_n - 1;
+ const uint16_t strd_sz = 1 << rxq->strd_sz_n;
+ const uint32_t strd_n = 1 << rxq->strd_num_n;
+ const uint32_t elts_n = wqe_n * strd_n;
+ const uint32_t elts_mask = elts_n - 1;
+ uint32_t elts_idx = rxq->rq_pi & elts_mask;
+ struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+ uint32_t rq_ci = rxq->rq_ci;
+ struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
+ uint16_t copied = 0;
+ uint16_t i = 0;
+
+ for (i = 0; i < pkts_n; ++i) {
+ uint16_t strd_cnt;
+ enum mlx5_rqx_code rxq_code;
+
+ if (rxq->consumed_strd == strd_n) {
+ /* Replace WQE if the buffer is still in use. */
+ mprq_buf_replace(rxq, rq_ci & wqe_mask);
+ /* Advance to the next WQE. */
+ rxq->consumed_strd = 0;
+ rq_ci++;
+ buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
+ }
+
+ if (!elts[i]->pkt_len) {
+ rxq->consumed_strd = strd_n;
+ rte_pktmbuf_free_seg(elts[i]);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ rxq->stats.ipackets -= 1;
+#endif
+ continue;
+ }
+ strd_cnt = (elts[i]->pkt_len / strd_sz) +
+ ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
+ rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
+ buf, rxq->consumed_strd, strd_cnt);
+ rxq->consumed_strd += strd_cnt;
+ if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
+ rte_pktmbuf_free_seg(elts[i]);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ rxq->stats.ipackets -= 1;
+ rxq->stats.ibytes -= elts[i]->pkt_len;
+#endif
+ if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
+ ++rxq->stats.rx_nombuf;
+ break;
+ }
+ if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
+ ++rxq->stats.idropped;
+ continue;
+ }
+ }
+ pkts[copied++] = elts[i];
+ }
+ rxq->rq_pi += i;
+ rxq->cq_ci += i;
+ rte_io_wmb();
+ *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+ if (rq_ci != rxq->rq_ci) {
+ rxq->rq_ci = rq_ci;
+ rte_io_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ }
+ return copied;
+}
+