+ mlx5_rxq_release(dev, idx);
+ rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp);
+ if (!rxq_ctrl) {
+ DRV_LOG(ERR, "port %u unable to allocate queue index %u",
+ dev->data->port_id, idx);
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
+ dev->data->port_id, idx);
+ (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+ return 0;
+}
+
+/**
+ * DPDK callback to release a RX queue.
+ *
+ * @param dpdk_rxq
+ * Generic RX queue pointer.
+ */
+void
+mlx5_rx_queue_release(void *dpdk_rxq)
+{
+ struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ struct priv *priv;
+
+ if (rxq == NULL)
+ return;
+ rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+ priv = rxq_ctrl->priv;
+ if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx))
+ rte_panic("port %u Rx queue %u is still used by a flow and"
+ " cannot be removed\n",
+ PORT_ID(priv), rxq_ctrl->idx);
+ mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx);
+}
+
+/**
+ * Allocate queue vector and fill epoll fd list for Rx interrupts.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
+{
+ struct priv *priv = dev->data->dev_private;
+ unsigned int i;
+ unsigned int rxqs_n = priv->rxqs_n;
+ unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+ unsigned int count = 0;
+ struct rte_intr_handle *intr_handle = dev->intr_handle;
+
+ if (!dev->data->dev_conf.intr_conf.rxq)
+ return 0;
+ mlx5_rx_intr_vec_disable(dev);
+ intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0]));
+ if (intr_handle->intr_vec == NULL) {
+ DRV_LOG(ERR,
+ "port %u failed to allocate memory for interrupt"
+ " vector, Rx interrupts will not be supported",
+ dev->data->port_id);
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ intr_handle->type = RTE_INTR_HANDLE_EXT;
+ for (i = 0; i != n; ++i) {
+ /* This rxq ibv must not be released in this function. */
+ struct mlx5_rxq_ibv *rxq_ibv = mlx5_rxq_ibv_get(dev, i);
+ int fd;
+ int flags;
+ int rc;
+
+ /* Skip queues that cannot request interrupts. */
+ if (!rxq_ibv || !rxq_ibv->channel) {
+ /* Use invalid intr_vec[] index to disable entry. */
+ intr_handle->intr_vec[i] =
+ RTE_INTR_VEC_RXTX_OFFSET +
+ RTE_MAX_RXTX_INTR_VEC_ID;
+ continue;
+ }
+ if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
+ DRV_LOG(ERR,
+ "port %u too many Rx queues for interrupt"
+ " vector size (%d), Rx interrupts cannot be"
+ " enabled",
+ dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID);
+ mlx5_rx_intr_vec_disable(dev);
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ fd = rxq_ibv->channel->fd;
+ flags = fcntl(fd, F_GETFL);
+ rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ if (rc < 0) {
+ rte_errno = errno;
+ DRV_LOG(ERR,
+ "port %u failed to make Rx interrupt file"
+ " descriptor %d non-blocking for queue index"
+ " %d",
+ dev->data->port_id, fd, i);
+ mlx5_rx_intr_vec_disable(dev);
+ return -rte_errno;
+ }
+ intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
+ intr_handle->efds[count] = fd;
+ count++;
+ }
+ if (!count)
+ mlx5_rx_intr_vec_disable(dev);
+ else
+ intr_handle->nb_efd = count;
+ return 0;
+}
+
+/**
+ * Clean up Rx interrupts handler.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ */
+void
+mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct rte_intr_handle *intr_handle = dev->intr_handle;
+ unsigned int i;
+ unsigned int rxqs_n = priv->rxqs_n;
+ unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+
+ if (!dev->data->dev_conf.intr_conf.rxq)
+ return;
+ if (!intr_handle->intr_vec)
+ goto free;
+ for (i = 0; i != n; ++i) {
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ struct mlx5_rxq_data *rxq_data;
+
+ if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
+ RTE_MAX_RXTX_INTR_VEC_ID)
+ continue;
+ /**
+ * Need to access directly the queue to release the reference
+ * kept in priv_rx_intr_vec_enable().
+ */
+ rxq_data = (*priv->rxqs)[i];
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ mlx5_rxq_ibv_release(rxq_ctrl->ibv);
+ }
+free:
+ rte_intr_free_epoll_fd(intr_handle);
+ if (intr_handle->intr_vec)
+ free(intr_handle->intr_vec);
+ intr_handle->nb_efd = 0;
+ intr_handle->intr_vec = NULL;
+}
+
+/**
+ * MLX5 CQ notification .
+ *
+ * @param rxq
+ * Pointer to receive queue structure.
+ * @param sq_n_rxq
+ * Sequence number per receive queue .
+ */
+static inline void
+mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
+{
+ int sq_n = 0;
+ uint32_t doorbell_hi;
+ uint64_t doorbell;
+ void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
+
+ sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
+ doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
+ doorbell = (uint64_t)doorbell_hi << 32;
+ doorbell |= rxq->cqn;
+ rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
+ rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
+}
+
+/**
+ * DPDK callback for Rx queue interrupt enable.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param rx_queue_id
+ * Rx queue number.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq_data;
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+
+ rxq_data = (*priv->rxqs)[rx_queue_id];
+ if (!rxq_data) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ if (rxq_ctrl->irq) {
+ struct mlx5_rxq_ibv *rxq_ibv;
+
+ rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id);
+ if (!rxq_ibv) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
+ mlx5_rxq_ibv_release(rxq_ibv);
+ }
+ return 0;
+}
+
+/**
+ * DPDK callback for Rx queue interrupt disable.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param rx_queue_id
+ * Rx queue number.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq_data;
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ struct mlx5_rxq_ibv *rxq_ibv = NULL;
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+ int ret;
+
+ rxq_data = (*priv->rxqs)[rx_queue_id];
+ if (!rxq_data) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ if (!rxq_ctrl->irq)
+ return 0;
+ rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id);
+ if (!rxq_ibv) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ ret = mlx5_glue->get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
+ if (ret || ev_cq != rxq_ibv->cq) {
+ rte_errno = EINVAL;
+ goto exit;
+ }
+ rxq_data->cq_arm_sn++;
+ mlx5_glue->ack_cq_events(rxq_ibv->cq, 1);
+ return 0;
+exit:
+ ret = rte_errno; /* Save rte_errno before cleanup. */
+ if (rxq_ibv)
+ mlx5_rxq_ibv_release(rxq_ibv);
+ DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d",
+ dev->data->port_id, rx_queue_id);
+ rte_errno = ret; /* Restore rte_errno. */
+ return -rte_errno;
+}
+
+/**
+ * Create the Rx queue Verbs object.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param idx
+ * Queue index in DPDK Rx queue array
+ *
+ * @return
+ * The Verbs object initialised, NULL otherwise and rte_errno is set.
+ */
+struct mlx5_rxq_ibv *
+mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ struct ibv_wq_attr mod;
+ union {
+ struct {
+ struct ibv_cq_init_attr_ex ibv;
+ struct mlx5dv_cq_init_attr mlx5;
+ } cq;
+ struct ibv_wq_init_attr wq;
+ struct ibv_cq_ex cq_attr;
+ } attr;
+ unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
+ struct mlx5_rxq_ibv *tmpl;
+ struct mlx5dv_cq cq_info;
+ struct mlx5dv_rwq rwq;
+ unsigned int i;
+ int ret = 0;
+ struct mlx5dv_obj obj;
+ struct mlx5_dev_config *config = &priv->config;
+
+ assert(rxq_data);
+ assert(!rxq_ctrl->ibv);
+ priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
+ priv->verbs_alloc_ctx.obj = rxq_ctrl;
+ tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
+ rxq_ctrl->socket);
+ if (!tmpl) {
+ DRV_LOG(ERR,
+ "port %u Rx queue %u cannot allocate verbs resources",
+ dev->data->port_id, rxq_ctrl->idx);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ tmpl->rxq_ctrl = rxq_ctrl;
+ /* Use the entire RX mempool as the memory region. */
+ tmpl->mr = mlx5_mr_get(dev, rxq_data->mp);
+ if (!tmpl->mr) {
+ tmpl->mr = mlx5_mr_new(dev, rxq_data->mp);
+ if (!tmpl->mr) {
+ DRV_LOG(ERR, "port %u: memeroy region creation failure",
+ dev->data->port_id);
+ goto error;
+ }
+ }
+ if (rxq_ctrl->irq) {
+ tmpl->channel = mlx5_glue->create_comp_channel(priv->ctx);
+ if (!tmpl->channel) {
+ DRV_LOG(ERR, "port %u: comp channel creation failure",
+ dev->data->port_id);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ }
+ attr.cq.ibv = (struct ibv_cq_init_attr_ex){
+ .cqe = cqe_n,
+ .channel = tmpl->channel,
+ .comp_mask = 0,
+ };
+ attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){
+ .comp_mask = 0,
+ };
+ if (config->cqe_comp && !rxq_data->hw_timestamp) {
+ attr.cq.mlx5.comp_mask |=
+ MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+ attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
+ /*
+ * For vectorized Rx, it must not be doubled in order to
+ * make cq_ci and rq_ci aligned.
+ */
+ if (mlx5_rxq_check_vec_support(rxq_data) < 0)
+ attr.cq.ibv.cqe *= 2;
+ } else if (config->cqe_comp && rxq_data->hw_timestamp) {
+ DRV_LOG(DEBUG,
+ "port %u Rx CQE compression is disabled for HW"
+ " timestamp",
+ dev->data->port_id);
+ }
+ tmpl->cq = mlx5_glue->cq_ex_to_cq
+ (mlx5_glue->dv_create_cq(priv->ctx, &attr.cq.ibv,
+ &attr.cq.mlx5));
+ if (tmpl->cq == NULL) {
+ DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure",
+ dev->data->port_id, idx);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ DRV_LOG(DEBUG, "port %u priv->device_attr.max_qp_wr is %d",
+ dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr);
+ DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d",
+ dev->data->port_id, priv->device_attr.orig_attr.max_sge);
+ attr.wq = (struct ibv_wq_init_attr){
+ .wq_context = NULL, /* Could be useful in the future. */
+ .wq_type = IBV_WQT_RQ,
+ /* Max number of outstanding WRs. */
+ .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
+ /* Max number of scatter/gather elements in a WR. */
+ .max_sge = 1 << rxq_data->sges_n,
+ .pd = priv->pd,
+ .cq = tmpl->cq,
+ .comp_mask =
+ IBV_WQ_FLAGS_CVLAN_STRIPPING |
+ 0,
+ .create_flags = (rxq_data->vlan_strip ?
+ IBV_WQ_FLAGS_CVLAN_STRIPPING :
+ 0),
+ };
+ /* By default, FCS (CRC) is stripped by hardware. */
+ if (rxq_data->crc_present) {
+ attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+ attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+ }
+#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
+ if (config->hw_padding) {
+ attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
+ attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+ }
+#endif
+ tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq);
+ if (tmpl->wq == NULL) {
+ DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure",
+ dev->data->port_id, idx);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ /*
+ * Make sure number of WRs*SGEs match expectations since a queue
+ * cannot allocate more than "desc" buffers.
+ */
+ if (((int)attr.wq.max_wr !=
+ ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
+ ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
+ DRV_LOG(ERR,
+ "port %u Rx queue %u requested %u*%u but got %u*%u"
+ " WRs*SGEs",
+ dev->data->port_id, idx,
+ ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
+ (1 << rxq_data->sges_n),
+ attr.wq.max_wr, attr.wq.max_sge);
+ rte_errno = EINVAL;
+ goto error;