+/**
+ * Create a QP Verbs object.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param idx
+ * Queue index in DPDK Tx queue array.
+ *
+ * @return
+ * The QP Verbs object, NULL otherwise and rte_errno is set.
+ */
+static struct ibv_qp *
+mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq_data, struct mlx5_txq_ctrl, txq);
+ struct ibv_qp *qp_obj = NULL;
+ struct ibv_qp_init_attr_ex qp_attr = { 0 };
+ const int desc = 1 << txq_data->elts_n;
+
+ MLX5_ASSERT(txq_ctrl->obj->cq);
+ /* CQ to be associated with the send queue. */
+ qp_attr.send_cq = txq_ctrl->obj->cq;
+ /* CQ to be associated with the receive queue. */
+ qp_attr.recv_cq = txq_ctrl->obj->cq;
+ /* Max number of outstanding WRs. */
+ qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ?
+ priv->sh->device_attr.max_qp_wr : desc);
+ /*
+ * Max number of scatter/gather elements in a WR, must be 1 to prevent
+ * libmlx5 from trying to affect must be 1 to prevent libmlx5 from
+ * trying to affect too much memory. TX gather is not impacted by the
+ * device_attr.max_sge limit and will still work properly.
+ */
+ qp_attr.cap.max_send_sge = 1;
+ qp_attr.qp_type = IBV_QPT_RAW_PACKET,
+ /* Do *NOT* enable this, completions events are managed per Tx burst. */
+ qp_attr.sq_sig_all = 0;
+ qp_attr.pd = priv->sh->pd;
+ qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
+ if (txq_data->inlen_send)
+ qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data;
+ if (txq_data->tso_en) {
+ qp_attr.max_tso_header = txq_ctrl->max_tso_header;
+ qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
+ }
+ qp_obj = mlx5_glue->create_qp_ex(priv->sh->ctx, &qp_attr);
+ if (qp_obj == NULL) {
+ DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.",
+ dev->data->port_id, idx);
+ rte_errno = errno;
+ }
+ return qp_obj;
+}
+
+/**
+ * Create the Tx queue Verbs object.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param idx
+ * Queue index in DPDK Tx queue array.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq_data, struct mlx5_txq_ctrl, txq);
+ struct mlx5_txq_obj *txq_obj = txq_ctrl->obj;
+ unsigned int cqe_n;
+ struct mlx5dv_qp qp;
+ struct mlx5dv_cq cq_info;
+ struct mlx5dv_obj obj;
+ const int desc = 1 << txq_data->elts_n;
+ int ret = 0;
+
+ MLX5_ASSERT(txq_data);
+ MLX5_ASSERT(txq_obj);
+ txq_obj->txq_ctrl = txq_ctrl;
+ if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
+ DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION "
+ "must never be set.", dev->data->port_id);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ cqe_n = desc / MLX5_TX_COMP_THRESH +
+ 1 + MLX5_TX_COMP_THRESH_INLINE_DIV;
+ txq_obj->cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0);
+ if (txq_obj->cq == NULL) {
+ DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.",
+ dev->data->port_id, idx);
+ rte_errno = errno;
+ goto error;
+ }
+ txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx);
+ if (txq_obj->qp == NULL) {
+ rte_errno = errno;
+ goto error;
+ }
+ ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY,
+ (uint8_t)priv->dev_port);
+ if (ret) {
+ DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.",
+ dev->data->port_id, idx);
+ rte_errno = errno;
+ goto error;
+ }
+ qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ /* If using DevX, need additional mask to read tisn value. */
+ if (priv->sh->devx && !priv->sh->tdn)
+ qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES;
+#endif
+ obj.cq.in = txq_obj->cq;
+ obj.cq.out = &cq_info;
+ obj.qp.in = txq_obj->qp;
+ obj.qp.out = &qp;
+ ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
+ if (ret != 0) {
+ rte_errno = errno;
+ goto error;
+ }
+ if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+ DRV_LOG(ERR,
+ "Port %u wrong MLX5_CQE_SIZE environment variable"
+ " value: it should be set to %u.",
+ dev->data->port_id, RTE_CACHE_LINE_SIZE);
+ rte_errno = EINVAL;
+ goto error;
+ }
+ txq_data->cqe_n = log2above(cq_info.cqe_cnt);
+ txq_data->cqe_s = 1 << txq_data->cqe_n;
+ txq_data->cqe_m = txq_data->cqe_s - 1;
+ txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8;
+ txq_data->wqes = qp.sq.buf;
+ txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
+ txq_data->wqe_s = 1 << txq_data->wqe_n;
+ txq_data->wqe_m = txq_data->wqe_s - 1;
+ txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s;
+ txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
+ txq_data->cq_db = cq_info.dbrec;
+ txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
+ txq_data->cq_ci = 0;
+ txq_data->cq_pi = 0;
+ txq_data->wqe_ci = 0;
+ txq_data->wqe_pi = 0;
+ txq_data->wqe_comp = 0;
+ txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ /*
+ * If using DevX need to query and store TIS transport domain value.
+ * This is done once per port.
+ * Will use this value on Rx, when creating matching TIR.
+ */
+ if (priv->sh->devx && !priv->sh->tdn) {
+ ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn,
+ &priv->sh->tdn);
+ if (ret) {
+ DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS "
+ "transport domain.", dev->data->port_id, idx);
+ rte_errno = EINVAL;
+ goto error;
+ } else {
+ DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d "
+ "transport domain %d.", dev->data->port_id,
+ idx, qp.tisn, priv->sh->tdn);
+ }
+ }
+#endif
+ txq_ctrl->bf_reg = qp.bf.reg;
+ if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
+ txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
+ DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".",
+ dev->data->port_id, txq_ctrl->uar_mmap_offset);
+ } else {
+ DRV_LOG(ERR,
+ "Port %u failed to retrieve UAR info, invalid"
+ " libmlx5.so",
+ dev->data->port_id);
+ rte_errno = EINVAL;
+ goto error;
+ }
+ txq_uar_init(txq_ctrl);
+ dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
+ return 0;
+error:
+ ret = rte_errno; /* Save rte_errno before cleanup. */
+ if (txq_obj->cq)
+ claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
+ if (txq_obj->qp)
+ claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
+ rte_errno = ret; /* Restore rte_errno. */
+ return -rte_errno;
+}
+
+/*
+ * Create the dummy QP with minimal resources for loopback.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev)
+{
+#if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+ struct ibv_context *ctx = sh->ctx;
+ struct mlx5dv_qp_init_attr qp_init_attr = {0};
+ struct {
+ struct ibv_cq_init_attr_ex ibv;
+ struct mlx5dv_cq_init_attr mlx5;
+ } cq_attr = {{0}};
+
+ if (dev->data->dev_conf.lpbk_mode) {
+ /* Allow packet sent from NIC loop back w/o source MAC check. */
+ qp_init_attr.comp_mask |=
+ MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
+ qp_init_attr.create_flags |=
+ MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
+ } else {
+ return 0;
+ }
+ /* Only need to check refcnt, 0 after "sh" is allocated. */
+ if (!!(__atomic_fetch_add(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
+ MLX5_ASSERT(sh->self_lb.ibv_cq && sh->self_lb.qp);
+ priv->lb_used = 1;
+ return 0;
+ }
+ cq_attr.ibv = (struct ibv_cq_init_attr_ex){
+ .cqe = 1,
+ .channel = NULL,
+ .comp_mask = 0,
+ };
+ cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
+ .comp_mask = 0,
+ };
+ /* Only CQ is needed, no WQ(RQ) is required in this case. */
+ sh->self_lb.ibv_cq = mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(ctx,
+ &cq_attr.ibv,
+ &cq_attr.mlx5));
+ if (!sh->self_lb.ibv_cq) {
+ DRV_LOG(ERR, "Port %u cannot allocate CQ for loopback.",
+ dev->data->port_id);
+ rte_errno = errno;
+ goto error;
+ }
+ sh->self_lb.qp = mlx5_glue->dv_create_qp(ctx,
+ &(struct ibv_qp_init_attr_ex){
+ .qp_type = IBV_QPT_RAW_PACKET,
+ .comp_mask = IBV_QP_INIT_ATTR_PD,
+ .pd = sh->pd,
+ .send_cq = sh->self_lb.ibv_cq,
+ .recv_cq = sh->self_lb.ibv_cq,
+ .cap.max_recv_wr = 1,
+ },
+ &qp_init_attr);
+ if (!sh->self_lb.qp) {
+ DRV_LOG(DEBUG, "Port %u cannot allocate QP for loopback.",
+ dev->data->port_id);
+ rte_errno = errno;
+ goto error;
+ }
+ priv->lb_used = 1;
+ return 0;
+error:
+ if (sh->self_lb.ibv_cq) {
+ claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
+ sh->self_lb.ibv_cq = NULL;
+ }
+ (void)__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED);
+ return -rte_errno;
+#else
+ RTE_SET_USED(dev);
+ return 0;
+#endif
+}
+
+/*
+ * Release the dummy queue resources for loopback.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ */
+void
+mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev)
+{
+#if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+
+ if (!priv->lb_used)
+ return;
+ MLX5_ASSERT(__atomic_load_n(&sh->self_lb.refcnt, __ATOMIC_RELAXED));
+ if (!(__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
+ if (sh->self_lb.qp) {
+ claim_zero(mlx5_glue->destroy_qp(sh->self_lb.qp));
+ sh->self_lb.qp = NULL;
+ }
+ if (sh->self_lb.ibv_cq) {
+ claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
+ sh->self_lb.ibv_cq = NULL;
+ }
+ }
+ priv->lb_used = 0;
+#else
+ RTE_SET_USED(dev);
+ return;
+#endif
+}
+
+/**
+ * Release an Tx verbs queue object.
+ *
+ * @param txq_obj
+ * Verbs Tx queue object..
+ */
+void
+mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj)
+{
+ MLX5_ASSERT(txq_obj);
+ claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
+ claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
+}
+