+ * @param rxq
+ * Pointer to receive queue structure.
+ * @param sq_n_rxq
+ * Sequence number per receive queue .
+ */
+static inline void
+mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
+{
+ int sq_n = 0;
+ uint32_t doorbell_hi;
+ uint64_t doorbell;
+ void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
+
+ sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
+ doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
+ doorbell = (uint64_t)doorbell_hi << 32;
+ doorbell |= rxq->cqn;
+ rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
+ rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
+}
+
+/**
+ * DPDK callback for Rx queue interrupt enable.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param rx_queue_id
+ * Rx queue number.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq_data;
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+
+ rxq_data = (*priv->rxqs)[rx_queue_id];
+ if (!rxq_data) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ if (rxq_ctrl->irq) {
+ struct mlx5_rxq_ibv *rxq_ibv;
+
+ rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id);
+ if (!rxq_ibv) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
+ mlx5_rxq_ibv_release(rxq_ibv);
+ }
+ return 0;
+}
+
+/**
+ * DPDK callback for Rx queue interrupt disable.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param rx_queue_id
+ * Rx queue number.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq_data;
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ struct mlx5_rxq_ibv *rxq_ibv = NULL;
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+ int ret;
+
+ rxq_data = (*priv->rxqs)[rx_queue_id];
+ if (!rxq_data) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ if (!rxq_ctrl->irq)
+ return 0;
+ rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id);
+ if (!rxq_ibv) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ ret = mlx5_glue->get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
+ if (ret || ev_cq != rxq_ibv->cq) {
+ rte_errno = EINVAL;
+ goto exit;
+ }
+ rxq_data->cq_arm_sn++;
+ mlx5_glue->ack_cq_events(rxq_ibv->cq, 1);
+ return 0;
+exit:
+ ret = rte_errno; /* Save rte_errno before cleanup. */
+ if (rxq_ibv)
+ mlx5_rxq_ibv_release(rxq_ibv);
+ WARN("port %u unable to disable interrupt on Rx queue %d",
+ dev->data->port_id, rx_queue_id);
+ rte_errno = ret; /* Restore rte_errno. */
+ return -rte_errno;
+}
+
+/**
+ * Create the Rx queue Verbs object.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param idx
+ * Queue index in DPDK Rx queue array
+ *
+ * @return
+ * The Verbs object initialised, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_rxq_ibv *
+mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ struct ibv_wq_attr mod;
+ union {
+ struct {
+ struct ibv_cq_init_attr_ex ibv;
+ struct mlx5dv_cq_init_attr mlx5;
+ } cq;
+ struct ibv_wq_init_attr wq;
+ struct ibv_cq_ex cq_attr;
+ } attr;
+ unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
+ struct mlx5_rxq_ibv *tmpl;
+ struct mlx5dv_cq cq_info;
+ struct mlx5dv_rwq rwq;
+ unsigned int i;
+ int ret = 0;
+ struct mlx5dv_obj obj;
+ struct mlx5_dev_config *config = &priv->config;
+
+ assert(rxq_data);
+ assert(!rxq_ctrl->ibv);
+ priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
+ priv->verbs_alloc_ctx.obj = rxq_ctrl;
+ tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
+ rxq_ctrl->socket);
+ if (!tmpl) {
+ ERROR("port %u Rx queue %u cannot allocate verbs resources",
+ dev->data->port_id, rxq_ctrl->idx);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ tmpl->rxq_ctrl = rxq_ctrl;
+ /* Use the entire RX mempool as the memory region. */
+ tmpl->mr = mlx5_mr_get(dev, rxq_data->mp);
+ if (!tmpl->mr) {
+ tmpl->mr = mlx5_mr_new(dev, rxq_data->mp);
+ if (!tmpl->mr) {
+ ERROR("port %u: memory region creation failure",
+ dev->data->port_id);
+ goto error;
+ }
+ }
+ if (rxq_ctrl->irq) {
+ tmpl->channel = mlx5_glue->create_comp_channel(priv->ctx);
+ if (!tmpl->channel) {
+ ERROR("port %u: comp channel creation failure",
+ dev->data->port_id);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ }
+ attr.cq.ibv = (struct ibv_cq_init_attr_ex){
+ .cqe = cqe_n,
+ .channel = tmpl->channel,
+ .comp_mask = 0,
+ };
+ attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){
+ .comp_mask = 0,
+ };
+ if (config->cqe_comp && !rxq_data->hw_timestamp) {
+ attr.cq.mlx5.comp_mask |=
+ MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+ attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
+ /*
+ * For vectorized Rx, it must not be doubled in order to
+ * make cq_ci and rq_ci aligned.
+ */
+ if (mlx5_rxq_check_vec_support(rxq_data) < 0)
+ attr.cq.ibv.cqe *= 2;
+ } else if (config->cqe_comp && rxq_data->hw_timestamp) {
+ DEBUG("port %u Rx CQE compression is disabled for HW timestamp",
+ dev->data->port_id);
+ }
+ tmpl->cq = mlx5_glue->cq_ex_to_cq
+ (mlx5_glue->dv_create_cq(priv->ctx, &attr.cq.ibv,
+ &attr.cq.mlx5));
+ if (tmpl->cq == NULL) {
+ ERROR("port %u Rx queue %u CQ creation failure",
+ dev->data->port_id, idx);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ DEBUG("port %u priv->device_attr.max_qp_wr is %d",
+ dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr);
+ DEBUG("port %u priv->device_attr.max_sge is %d",
+ dev->data->port_id, priv->device_attr.orig_attr.max_sge);
+ attr.wq = (struct ibv_wq_init_attr){
+ .wq_context = NULL, /* Could be useful in the future. */
+ .wq_type = IBV_WQT_RQ,
+ /* Max number of outstanding WRs. */
+ .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
+ /* Max number of scatter/gather elements in a WR. */
+ .max_sge = 1 << rxq_data->sges_n,
+ .pd = priv->pd,
+ .cq = tmpl->cq,
+ .comp_mask =
+ IBV_WQ_FLAGS_CVLAN_STRIPPING |
+ 0,
+ .create_flags = (rxq_data->vlan_strip ?
+ IBV_WQ_FLAGS_CVLAN_STRIPPING :
+ 0),
+ };
+ /* By default, FCS (CRC) is stripped by hardware. */
+ if (rxq_data->crc_present) {
+ attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+ attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+ }
+#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
+ if (config->hw_padding) {
+ attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
+ attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+ }
+#endif
+ tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq);
+ if (tmpl->wq == NULL) {
+ ERROR("port %u Rx queue %u WQ creation failure",
+ dev->data->port_id, idx);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ /*
+ * Make sure number of WRs*SGEs match expectations since a queue
+ * cannot allocate more than "desc" buffers.
+ */
+ if (((int)attr.wq.max_wr !=
+ ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
+ ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
+ ERROR("port %u Rx queue %u requested %u*%u but got %u*%u"
+ " WRs*SGEs",
+ dev->data->port_id, idx,
+ ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
+ (1 << rxq_data->sges_n),
+ attr.wq.max_wr, attr.wq.max_sge);
+ rte_errno = EINVAL;
+ goto error;
+ }
+ /* Change queue state to ready. */
+ mod = (struct ibv_wq_attr){
+ .attr_mask = IBV_WQ_ATTR_STATE,
+ .wq_state = IBV_WQS_RDY,
+ };
+ ret = mlx5_glue->modify_wq(tmpl->wq, &mod);
+ if (ret) {
+ ERROR("port %u Rx queue %u WQ state to IBV_WQS_RDY failed",
+ dev->data->port_id, idx);
+ rte_errno = ret;
+ goto error;
+ }
+ obj.cq.in = tmpl->cq;
+ obj.cq.out = &cq_info;
+ obj.rwq.in = tmpl->wq;
+ obj.rwq.out = &rwq;
+ ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
+ if (ret) {
+ rte_errno = ret;
+ goto error;
+ }
+ if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+ ERROR("port %u wrong MLX5_CQE_SIZE environment variable value: "
+ "it should be set to %u", dev->data->port_id,
+ RTE_CACHE_LINE_SIZE);
+ rte_errno = EINVAL;
+ goto error;
+ }
+ /* Fill the rings. */
+ rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
+ (uintptr_t)rwq.buf;
+ for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
+ struct rte_mbuf *buf = (*rxq_data->elts)[i];
+ volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
+
+ /* scat->addr must be able to store a pointer. */
+ assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+ *scat = (struct mlx5_wqe_data_seg){
+ .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+ uintptr_t)),
+ .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
+ .lkey = tmpl->mr->lkey,
+ };
+ }
+ rxq_data->rq_db = rwq.dbrec;
+ rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
+ rxq_data->cq_ci = 0;
+ rxq_data->rq_ci = 0;
+ rxq_data->rq_pi = 0;
+ rxq_data->zip = (struct rxq_zip){
+ .ai = 0,
+ };
+ rxq_data->cq_db = cq_info.dbrec;
+ rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
+ rxq_data->cq_uar = cq_info.cq_uar;
+ rxq_data->cqn = cq_info.cqn;
+ rxq_data->cq_arm_sn = 0;
+ /* Update doorbell counter. */
+ rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
+ rte_wmb();
+ *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
+ DEBUG("port %u rxq %u updated with %p", dev->data->port_id, idx,
+ (void *)&tmpl);
+ rte_atomic32_inc(&tmpl->refcnt);
+ DEBUG("port %u Verbs Rx queue %u: refcnt %d", dev->data->port_id, idx,
+ rte_atomic32_read(&tmpl->refcnt));
+ LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+ priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
+ return tmpl;
+error:
+ ret = rte_errno; /* Save rte_errno before cleanup. */
+ if (tmpl->wq)
+ claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
+ if (tmpl->cq)
+ claim_zero(mlx5_glue->destroy_cq(tmpl->cq));
+ if (tmpl->channel)
+ claim_zero(mlx5_glue->destroy_comp_channel(tmpl->channel));
+ if (tmpl->mr)
+ mlx5_mr_release(tmpl->mr);
+ priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
+ rte_errno = ret; /* Restore rte_errno. */
+ return NULL;
+}
+
+/**
+ * Get an Rx queue Verbs object.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param idx
+ * Queue index in DPDK Rx queue array
+ *
+ * @return
+ * The Verbs object if it exists.
+ */
+struct mlx5_rxq_ibv *
+mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+
+ if (idx >= priv->rxqs_n)
+ return NULL;
+ if (!rxq_data)
+ return NULL;
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ if (rxq_ctrl->ibv) {
+ mlx5_mr_get(dev, rxq_data->mp);
+ rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
+ DEBUG("port %u Verbs Rx queue %u: refcnt %d",
+ dev->data->port_id, rxq_ctrl->idx,
+ rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
+ }
+ return rxq_ctrl->ibv;
+}
+
+/**
+ * Release an Rx verbs queue object.
+ *
+ * @param rxq_ibv
+ * Verbs Rx queue object.
+ *
+ * @return
+ * 1 while a reference on it exists, 0 when freed.
+ */
+int
+mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv)
+{
+ int ret;
+
+ assert(rxq_ibv);
+ assert(rxq_ibv->wq);
+ assert(rxq_ibv->cq);
+ assert(rxq_ibv->mr);
+ ret = mlx5_mr_release(rxq_ibv->mr);
+ if (!ret)
+ rxq_ibv->mr = NULL;
+ DEBUG("port %u Verbs Rx queue %u: refcnt %d",
+ rxq_ibv->rxq_ctrl->priv->dev->data->port_id,
+ rxq_ibv->rxq_ctrl->idx, rte_atomic32_read(&rxq_ibv->refcnt));
+ if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
+ rxq_free_elts(rxq_ibv->rxq_ctrl);
+ claim_zero(mlx5_glue->destroy_wq(rxq_ibv->wq));
+ claim_zero(mlx5_glue->destroy_cq(rxq_ibv->cq));
+ if (rxq_ibv->channel)
+ claim_zero(mlx5_glue->destroy_comp_channel
+ (rxq_ibv->channel));
+ LIST_REMOVE(rxq_ibv, next);
+ rte_free(rxq_ibv);
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * Verify the Verbs Rx queue list is empty
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * The number of object not released.
+ */
+int
+mlx5_rxq_ibv_verify(struct rte_eth_dev *dev)
+{
+ struct priv *priv = dev->data->dev_private;
+ int ret = 0;
+ struct mlx5_rxq_ibv *rxq_ibv;
+
+ LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
+ DEBUG("port %u Verbs Rx queue %u still referenced",
+ dev->data->port_id, rxq_ibv->rxq_ctrl->idx);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param rxq_ibv
+ * Verbs Rx queue object.
+ */
+int
+mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv)
+{
+ assert(rxq_ibv);
+ return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
+}
+
+/**
+ * Create a DPDK Rx queue.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param idx
+ * TX queue index.
+ * @param desc
+ * Number of descriptors to configure in queue.
+ * @param socket
+ * NUMA socket on which memory must be allocated.