+int mlx5_rx_descriptor_status(void *, uint16_t);
+int mlx5_tx_descriptor_status(void *, uint16_t);
+
+/* Vectorized version of mlx5_rxtx.c */
+int priv_check_raw_vec_tx_support(struct priv *, struct rte_eth_dev *);
+int priv_check_vec_tx_support(struct priv *, struct rte_eth_dev *);
+int rxq_check_vec_support(struct mlx5_rxq_data *);
+int priv_check_vec_rx_support(struct priv *);
+uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
+uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
+uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
+
+/* mlx5_mr.c */
+
+void mlx5_mp2mr_iter(struct rte_mempool *, void *);
+struct mlx5_mr *priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *,
+ struct rte_mempool *, unsigned int);
+struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
+ unsigned int);
+
+#ifndef NDEBUG
+/**
+ * Verify or set magic value in CQE.
+ *
+ * @param cqe
+ * Pointer to CQE.
+ *
+ * @return
+ * 0 the first time.
+ */
+static inline int
+check_cqe_seen(volatile struct mlx5_cqe *cqe)
+{
+ static const uint8_t magic[] = "seen";
+ volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0;
+ int ret = 1;
+ unsigned int i;
+
+ for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i)
+ if (!ret || (*buf)[i] != magic[i]) {
+ ret = 0;
+ (*buf)[i] = magic[i];
+ }
+ return ret;
+}
+#endif /* NDEBUG */
+
+/**
+ * Check whether CQE is valid.
+ *
+ * @param cqe
+ * Pointer to CQE.
+ * @param cqes_n
+ * Size of completion queue.
+ * @param ci
+ * Consumer index.
+ *
+ * @return
+ * 0 on success, 1 on failure.
+ */
+static __rte_always_inline int
+check_cqe(volatile struct mlx5_cqe *cqe,
+ unsigned int cqes_n, const uint16_t ci)
+{
+ uint16_t idx = ci & cqes_n;
+ uint8_t op_own = cqe->op_own;
+ uint8_t op_owner = MLX5_CQE_OWNER(op_own);
+ uint8_t op_code = MLX5_CQE_OPCODE(op_own);
+
+ if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
+ return 1; /* No CQE. */
+#ifndef NDEBUG
+ if ((op_code == MLX5_CQE_RESP_ERR) ||
+ (op_code == MLX5_CQE_REQ_ERR)) {
+ volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe;
+ uint8_t syndrome = err_cqe->syndrome;
+
+ if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
+ (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
+ return 0;
+ if (!check_cqe_seen(cqe)) {
+ ERROR("unexpected CQE error %u (0x%02x)"
+ " syndrome 0x%02x",
+ op_code, op_code, syndrome);
+ rte_hexdump(stderr, "MLX5 Error CQE:",
+ (const void *)((uintptr_t)err_cqe),
+ sizeof(*err_cqe));
+ }
+ return 1;
+ } else if ((op_code != MLX5_CQE_RESP_SEND) &&
+ (op_code != MLX5_CQE_REQ)) {
+ if (!check_cqe_seen(cqe)) {
+ ERROR("unexpected CQE opcode %u (0x%02x)",
+ op_code, op_code);
+ rte_hexdump(stderr, "MLX5 CQE:",
+ (const void *)((uintptr_t)cqe),
+ sizeof(*cqe));
+ }
+ return 1;
+ }
+#endif /* NDEBUG */
+ return 0;
+}
+
+/**
+ * Return the address of the WQE.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param wqe_ci
+ * WQE consumer index.
+ *
+ * @return
+ * WQE address.
+ */
+static inline uintptr_t *
+tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)
+{
+ ci &= ((1 << txq->wqe_n) - 1);
+ return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
+}
+
+/**
+ * Manage TX completions.
+ *
+ * When sending a burst, mlx5_tx_burst() posts several WRs.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ */
+static __rte_always_inline void
+mlx5_tx_complete(struct mlx5_txq_data *txq)
+{
+ const uint16_t elts_n = 1 << txq->elts_n;
+ const uint16_t elts_m = elts_n - 1;
+ const unsigned int cqe_n = 1 << txq->cqe_n;
+ const unsigned int cqe_cnt = cqe_n - 1;
+ uint16_t elts_free = txq->elts_tail;
+ uint16_t elts_tail;
+ uint16_t cq_ci = txq->cq_ci;
+ volatile struct mlx5_cqe *cqe = NULL;
+ volatile struct mlx5_wqe_ctrl *ctrl;
+ struct rte_mbuf *m, *free[elts_n];
+ struct rte_mempool *pool = NULL;
+ unsigned int blk_n = 0;
+
+ cqe = &(*txq->cqes)[cq_ci & cqe_cnt];
+ if (unlikely(check_cqe(cqe, cqe_n, cq_ci)))
+ return;
+#ifndef NDEBUG
+ if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
+ (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
+ if (!check_cqe_seen(cqe)) {
+ ERROR("unexpected error CQE, TX stopped");
+ rte_hexdump(stderr, "MLX5 TXQ:",
+ (const void *)((uintptr_t)txq->wqes),
+ ((1 << txq->wqe_n) *
+ MLX5_WQE_SIZE));
+ }
+ return;
+ }
+#endif /* NDEBUG */
+ ++cq_ci;
+ txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
+ ctrl = (volatile struct mlx5_wqe_ctrl *)
+ tx_mlx5_wqe(txq, txq->wqe_pi);
+ elts_tail = ctrl->ctrl3;
+ assert((elts_tail & elts_m) < (1 << txq->wqe_n));
+ /* Free buffers. */
+ while (elts_free != elts_tail) {
+ m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
+ if (likely(m != NULL)) {
+ if (likely(m->pool == pool)) {
+ free[blk_n++] = m;
+ } else {
+ if (likely(pool != NULL))
+ rte_mempool_put_bulk(pool,
+ (void *)free,
+ blk_n);
+ free[0] = m;
+ pool = m->pool;
+ blk_n = 1;
+ }
+ }
+ }
+ if (blk_n)
+ rte_mempool_put_bulk(pool, (void *)free, blk_n);
+#ifndef NDEBUG
+ elts_free = txq->elts_tail;
+ /* Poisoning. */
+ while (elts_free != elts_tail) {
+ memset(&(*txq->elts)[elts_free & elts_m],
+ 0x66,
+ sizeof((*txq->elts)[elts_free & elts_m]));
+ ++elts_free;
+ }
+#endif
+ txq->cq_ci = cq_ci;
+ txq->elts_tail = elts_tail;
+ /* Update the consumer index. */
+ rte_compiler_barrier();
+ *txq->cq_db = rte_cpu_to_be_32(cq_ci);
+}
+
+/**
+ * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
+ * the cloned mbuf is allocated is returned instead.
+ *
+ * @param buf
+ * Pointer to mbuf.
+ *
+ * @return
+ * Memory pool where data is located for given mbuf.
+ */
+static struct rte_mempool *
+mlx5_tx_mb2mp(struct rte_mbuf *buf)
+{
+ if (unlikely(RTE_MBUF_INDIRECT(buf)))
+ return rte_mbuf_from_indirect(buf)->pool;
+ return buf->pool;
+}
+
+/**
+ * Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[].
+ * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
+ * remove an entry first.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param[in] mp
+ * Memory Pool for which a Memory Region lkey must be returned.
+ *
+ * @return
+ * mr->lkey on success, (uint32_t)-1 on failure.
+ */
+static __rte_always_inline uint32_t
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
+{
+ uint16_t i = txq->mr_cache_idx;
+ uintptr_t addr = rte_pktmbuf_mtod_offset(mb, uintptr_t, DATA_LEN(mb));
+ struct mlx5_mr *mr;
+
+ assert(i < RTE_DIM(txq->mp2mr));
+ if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+ return txq->mp2mr[i]->lkey;
+ for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
+ if (unlikely(txq->mp2mr[i]->mr == NULL)) {
+ /* Unknown MP, add a new MR for it. */
+ break;
+ }
+ if (txq->mp2mr[i]->start <= addr &&
+ txq->mp2mr[i]->end >= addr) {
+ assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
+ assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) ==
+ txq->mp2mr[i]->lkey);
+ txq->mr_cache_idx = i;
+ return txq->mp2mr[i]->lkey;
+ }
+ }
+ txq->mr_cache_idx = 0;
+ mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+ /*
+ * Request the reference to use in this queue, the original one is
+ * kept by the control plane.
+ */
+ if (mr) {
+ rte_atomic32_inc(&mr->refcnt);
+ return mr->lkey;
+ }
+ return (uint32_t)-1;
+}
+
+/**
+ * Ring TX queue doorbell and flush the update if requested.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param wqe
+ * Pointer to the last WQE posted in the NIC.
+ * @param cond
+ * Request for write memory barrier after BlueFlame update.
+ */
+static __rte_always_inline void
+mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
+ int cond)
+{
+ uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+ volatile uint64_t *src = ((volatile uint64_t *)wqe);
+
+ rte_io_wmb();
+ *txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
+ /* Ensure ordering between DB record and BF copy. */
+ rte_wmb();
+ *dst = *src;
+ if (cond)
+ rte_wmb();
+}
+
+/**
+ * Ring TX queue doorbell and flush the update by write memory barrier.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param wqe
+ * Pointer to the last WQE posted in the NIC.
+ */
+static __rte_always_inline void
+mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
+{
+ mlx5_tx_dbrec_cond_wmb(txq, wqe, 1);
+}
+
+/**
+ * Convert the Checksum offloads to Verbs.
+ *
+ * @param txq_data
+ * Pointer to the Tx queue.
+ * @param buf
+ * Pointer to the mbuf.
+ *
+ * @return
+ * the converted cs_flags.
+ */
+static __rte_always_inline uint8_t
+txq_ol_cksum_to_cs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf)
+{
+ uint8_t cs_flags = 0;
+
+ /* Should we enable HW CKSUM offload */
+ if (buf->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM |
+ PKT_TX_OUTER_IP_CKSUM)) {
+ if (txq_data->tunnel_en &&
+ (buf->ol_flags &
+ (PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN))) {
+ cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
+ MLX5_ETH_WQE_L4_INNER_CSUM;
+ if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+ cs_flags |= MLX5_ETH_WQE_L3_CSUM;
+ } else {
+ cs_flags = MLX5_ETH_WQE_L3_CSUM |
+ MLX5_ETH_WQE_L4_CSUM;
+ }
+ }
+ return cs_flags;
+}
+
+/**
+ * Count the number of contiguous single segment packets.
+ *
+ * @param pkts
+ * Pointer to array of packets.
+ * @param pkts_n
+ * Number of packets.
+ *
+ * @return
+ * Number of contiguous single segment packets.
+ */
+static __rte_always_inline unsigned int
+txq_count_contig_single_seg(struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ unsigned int pos;
+
+ if (!pkts_n)
+ return 0;
+ /* Count the number of contiguous single segment packets. */
+ for (pos = 0; pos < pkts_n; ++pos)
+ if (NB_SEGS(pkts[pos]) > 1)
+ break;
+ return pos;
+}
+
+/**
+ * Count the number of contiguous multi-segment packets.
+ *
+ * @param pkts
+ * Pointer to array of packets.
+ * @param pkts_n
+ * Number of packets.
+ *
+ * @return
+ * Number of contiguous multi-segment packets.
+ */
+static __rte_always_inline unsigned int
+txq_count_contig_multi_seg(struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ unsigned int pos;
+
+ if (!pkts_n)
+ return 0;
+ /* Count the number of contiguous multi-segment packets. */
+ for (pos = 0; pos < pkts_n; ++pos)
+ if (NB_SEGS(pkts[pos]) == 1)
+ break;
+ return pos;
+}