From: NĂ©lio Laranjeiro Date: Mon, 9 Oct 2017 14:44:46 +0000 (+0200) Subject: net/mlx5: separate DPDK from verbs Rx queue objects X-Git-Tag: spdx-start~1524 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=09cb5b581762696d5db11b95111a53c8cc32354f;p=dpdk.git net/mlx5: separate DPDK from verbs Rx queue objects Move verbs object to their own functions to allocate/release them independently from the DPDK queue. At the same time a reference counter is added to help in issues detections when the queue is being release but still in use somewhere else (flows for instance). Signed-off-by: Nelio Laranjeiro Acked-by: Yongseok Koh --- diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index b658b2ba64..ed77351cc2 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -253,6 +253,9 @@ mlx5_dev_close(struct rte_eth_dev *dev) if (priv->reta_idx != NULL) rte_free(priv->reta_idx); priv_socket_uninit(priv); + ret = mlx5_priv_rxq_ibv_verify(priv); + if (ret) + WARN("%p: some Verbs Rx queue still remain", (void *)priv); ret = priv_flow_verify(priv); if (ret) WARN("%p: some flows still remain", (void *)priv); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index f5637229d9..48c0c8eeff 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -147,6 +147,7 @@ struct priv { struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */ TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */ LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */ + LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */ uint32_t link_speed_capa; /* Link speed capabilities. */ struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ @@ -290,7 +291,6 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *); int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *); int priv_flow_start(struct priv *); void priv_flow_stop(struct priv *); -int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *); int priv_flow_verify(struct priv *); /* mlx5_socket.c */ diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 193a90bc1a..362ec91cfe 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -95,11 +95,11 @@ struct rte_flow { struct ibv_flow *ibv_flow; /**< Verbs flow. */ struct ibv_wq *wq; /**< Verbs work queue. */ struct ibv_cq *cq; /**< Verbs completion queue. */ - uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */ uint32_t mark:1; /**< Set if the flow is marked. */ uint32_t drop:1; /**< Drop queue. */ uint64_t hash_fields; /**< Fields that participate in the hash. */ - struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */ + uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */ + uint16_t queues_n; /**< Number of queues in the list. */ }; /** Static initializer for items. */ @@ -1096,23 +1096,21 @@ priv_flow_create_action_queue(struct priv *priv, assert(priv->pd); assert(priv->ctx); assert(!flow->actions.drop); - rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) + - sizeof(*rte_flow->rxqs) * flow->actions.queues_n, - 0); + rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0); if (!rte_flow) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "cannot allocate flow memory"); return NULL; } for (i = 0; i < flow->actions.queues_n; ++i) { - struct mlx5_rxq_ctrl *rxq; - - rxq = container_of((*priv->rxqs)[flow->actions.queues[i]], - struct mlx5_rxq_ctrl, rxq); - wqs[i] = rxq->wq; - rte_flow->rxqs[i] = &rxq->rxq; - ++rte_flow->rxqs_n; - rxq->rxq.mark |= flow->actions.mark; + struct mlx5_rxq_ibv *rxq_ibv = + mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]); + + wqs[i] = rxq_ibv->wq; + rte_flow->queues[i] = flow->actions.queues[i]; + ++rte_flow->queues_n; + (*priv->rxqs)[flow->actions.queues[i]]->mark |= + flow->actions.mark; } /* finalise indirection table. */ for (j = 0; i < wqs_n; ++i, ++j) { @@ -1290,6 +1288,8 @@ static void priv_flow_destroy(struct priv *priv, struct rte_flow *flow) { + unsigned int i; + TAILQ_REMOVE(&priv->flows, flow, next); if (flow->ibv_flow) claim_zero(ibv_destroy_flow(flow->ibv_flow)); @@ -1299,37 +1299,33 @@ priv_flow_destroy(struct priv *priv, claim_zero(ibv_destroy_qp(flow->qp)); if (flow->ind_table) claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table)); - if (flow->mark) { + for (i = 0; i != flow->queues_n; ++i) { struct rte_flow *tmp; - struct mlx5_rxq_data *rxq; - uint32_t mark_n = 0; - uint32_t queue_n; + struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[flow->queues[i]]; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); /* * To remove the mark from the queue, the queue must not be * present in any other marked flow (RSS or not). */ - for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) { - rxq = flow->rxqs[queue_n]; - for (tmp = TAILQ_FIRST(&priv->flows); - tmp; - tmp = TAILQ_NEXT(tmp, next)) { - uint32_t tqueue_n; + if (flow->mark) { + int mark = 0; + + TAILQ_FOREACH(tmp, &priv->flows, next) { + unsigned int j; if (tmp->drop) continue; - for (tqueue_n = 0; - tqueue_n < tmp->rxqs_n; - ++tqueue_n) { - struct mlx5_rxq_data *trxq; - - trxq = tmp->rxqs[tqueue_n]; - if (rxq == trxq) - ++mark_n; - } + if (!tmp->mark) + continue; + for (j = 0; (j != tmp->queues_n) && !mark; j++) + if (tmp->queues[j] == flow->queues[i]) + mark = 1; } - rxq->mark = !!mark_n; + rxq_data->mark = mark; } + mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv); } free: rte_free(flow->ibv_attr); @@ -1523,8 +1519,8 @@ priv_flow_stop(struct priv *priv) if (flow->mark) { unsigned int n; - for (n = 0; n < flow->rxqs_n; ++n) - flow->rxqs[n]->mark = 0; + for (n = 0; n < flow->queues_n; ++n) + (*priv->rxqs)[flow->queues[n]]->mark = 0; } DEBUG("Flow %p removed", (void *)flow); } @@ -1566,39 +1562,8 @@ priv_flow_start(struct priv *priv) if (flow->mark) { unsigned int n; - for (n = 0; n < flow->rxqs_n; ++n) - flow->rxqs[n]->mark = 1; - } - } - return 0; -} - -/** - * Verify if the Rx queue is used in a flow. - * - * @param priv - * Pointer to private structure. - * @param rxq - * Pointer to the queue to search. - * - * @return - * Nonzero if the queue is used by a flow. - */ -int -priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq) -{ - struct rte_flow *flow; - - for (flow = TAILQ_FIRST(&priv->flows); - flow; - flow = TAILQ_NEXT(flow, next)) { - unsigned int n; - - if (flow->drop) - continue; - for (n = 0; n < flow->rxqs_n; ++n) { - if (flow->rxqs[n] == rxq) - return 1; + for (n = 0; n < flow->queues_n; ++n) + (*priv->rxqs)[flow->queues[n]]->mark = 1; } } return 0; diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 0d645ec6f3..89c2cdb785 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -378,7 +378,7 @@ priv_create_hash_rxqs(struct priv *priv) rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]], struct mlx5_rxq_ctrl, rxq); - wqs[i] = rxq_ctrl->wq; + wqs[i] = rxq_ctrl->ibv->wq; } /* Get number of hash RX queues to configure. */ for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i) @@ -645,8 +645,6 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n) /* Iterate on segments. */ for (i = 0; (i != elts_n); ++i) { struct rte_mbuf *buf; - volatile struct mlx5_wqe_data_seg *scat = - &(*rxq_ctrl->rxq.wqes)[i]; buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); if (buf == NULL) { @@ -667,21 +665,12 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n) DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); PKT_LEN(buf) = DATA_LEN(buf); NB_SEGS(buf) = 1; - /* scat->addr must be able to store a pointer. */ - assert(sizeof(scat->addr) >= sizeof(uintptr_t)); - *scat = (struct mlx5_wqe_data_seg){ - .addr = - rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)), - .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), - .lkey = rxq_ctrl->mr->lkey, - }; (*rxq_ctrl->rxq.elts)[i] = buf; } if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; - assert(rxq->elts_n == rxq->cqe_n); /* Initialize default rearm_data for vPMD. */ mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; rte_mbuf_refcnt_set(mbuf_init, 1); @@ -759,75 +748,11 @@ void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) { DEBUG("cleaning up %p", (void *)rxq_ctrl); - rxq_free_elts(rxq_ctrl); - if (rxq_ctrl->wq != NULL) - claim_zero(ibv_destroy_wq(rxq_ctrl->wq)); - if (rxq_ctrl->cq != NULL) - claim_zero(ibv_destroy_cq(rxq_ctrl->cq)); - if (rxq_ctrl->channel != NULL) - claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel)); - if (rxq_ctrl->mr != NULL) - priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr); + if (rxq_ctrl->ibv) + mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv); memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); } -/** - * Initialize RX queue. - * - * @param tmpl - * Pointer to RX queue control template. - * - * @return - * 0 on success, errno value on failure. - */ -static inline int -rxq_setup(struct mlx5_rxq_ctrl *tmpl) -{ - struct ibv_cq *ibcq = tmpl->cq; - struct mlx5dv_cq cq_info; - struct mlx5dv_rwq rwq; - const uint16_t desc_n = - (1 << tmpl->rxq.elts_n) + tmpl->priv->rx_vec_en * - MLX5_VPMD_DESCS_PER_LOOP; - struct rte_mbuf *(*elts)[desc_n] = - rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket); - struct mlx5dv_obj obj; - int ret = 0; - - obj.cq.in = ibcq; - obj.cq.out = &cq_info; - obj.rwq.in = tmpl->wq; - obj.rwq.out = &rwq; - ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); - if (ret != 0) { - return -EINVAL; - } - if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { - ERROR("Wrong MLX5_CQE_SIZE environment variable value: " - "it should be set to %u", RTE_CACHE_LINE_SIZE); - return EINVAL; - } - if (elts == NULL) - return ENOMEM; - tmpl->rxq.rq_db = rwq.dbrec; - tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt); - tmpl->rxq.cq_ci = 0; - tmpl->rxq.rq_ci = 0; - tmpl->rxq.rq_pi = 0; - tmpl->rxq.cq_db = cq_info.dbrec; - tmpl->rxq.wqes = - (volatile struct mlx5_wqe_data_seg (*)[]) - (uintptr_t)rwq.buf; - tmpl->rxq.cqes = - (volatile struct mlx5_cqe (*)[]) - (uintptr_t)cq_info.buf; - tmpl->rxq.elts = elts; - tmpl->rxq.cq_uar = cq_info.cq_uar; - tmpl->rxq.cqn = cq_info.cqn; - tmpl->rxq.cq_arm_sn = 0; - return 0; -} - /** * Configure a RX queue. * @@ -853,29 +778,28 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, const struct rte_eth_rxconf *conf, struct rte_mempool *mp) { struct priv *priv = dev->data->dev_private; + const uint16_t desc_n = + desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; struct mlx5_rxq_ctrl tmpl = { .priv = priv, .socket = socket, .rxq = { + .elts = rte_calloc_socket("RXQ", 1, + desc_n * + sizeof(struct rte_mbuf *), 0, + socket), .elts_n = log2above(desc), .mp = mp, .rss_hash = priv->rxqs_n > 1, }, }; - struct ibv_wq_attr mod; - union { - struct ibv_cq_init_attr_ex cq; - struct ibv_wq_init_attr wq; - struct ibv_cq_ex cq_attr; - } attr; unsigned int mb_len = rte_pktmbuf_data_room_size(mp); - unsigned int cqe_n = desc - 1; - const uint16_t desc_n = - desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; struct rte_mbuf *(*elts)[desc_n] = NULL; int ret = 0; (void)conf; /* Thresholds configuration (ignored). */ + if (dev->data->dev_conf.intr_conf.rxq) + tmpl.irq = 1; /* Enable scattered packets support for this queue if necessary. */ assert(mb_len >= RTE_PKTMBUF_HEADROOM); if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= @@ -928,77 +852,13 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, if (priv->hw_csum_l2tun) tmpl.rxq.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum; - /* Use the entire RX mempool as the memory region. */ - tmpl.mr = priv_mr_get(priv, mp); - if (tmpl.mr == NULL) { - tmpl.mr = priv_mr_new(priv, mp); - if (tmpl.mr == NULL) { - ret = EINVAL; - ERROR("%p: MR creation failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - } - if (dev->data->dev_conf.intr_conf.rxq) { - tmpl.channel = ibv_create_comp_channel(priv->ctx); - if (tmpl.channel == NULL) { - ret = ENOMEM; - ERROR("%p: Rx interrupt completion channel creation" - " failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - } - attr.cq = (struct ibv_cq_init_attr_ex){ - .comp_mask = 0, - }; - if (priv->cqe_comp) { - attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS; - attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; - /* - * For vectorized Rx, it must not be doubled in order to - * make cq_ci and rq_ci aligned. - */ - if (rxq_check_vec_support(&tmpl.rxq) < 0) - cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */ - } - tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0); - if (tmpl.cq == NULL) { - ret = ENOMEM; - ERROR("%p: CQ creation failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - DEBUG("priv->device_attr.max_qp_wr is %d", - priv->device_attr.orig_attr.max_qp_wr); - DEBUG("priv->device_attr.max_sge is %d", - priv->device_attr.orig_attr.max_sge); /* Configure VLAN stripping. */ tmpl.rxq.vlan_strip = (priv->hw_vlan_strip && !!dev->data->dev_conf.rxmode.hw_vlan_strip); - attr.wq = (struct ibv_wq_init_attr){ - .wq_context = NULL, /* Could be useful in the future. */ - .wq_type = IBV_WQT_RQ, - /* Max number of outstanding WRs. */ - .max_wr = desc >> tmpl.rxq.sges_n, - /* Max number of scatter/gather elements in a WR. */ - .max_sge = 1 << tmpl.rxq.sges_n, - .pd = priv->pd, - .cq = tmpl.cq, - .comp_mask = - IBV_WQ_FLAGS_CVLAN_STRIPPING | - 0, - .create_flags = (tmpl.rxq.vlan_strip ? - IBV_WQ_FLAGS_CVLAN_STRIPPING : - 0), - }; /* By default, FCS (CRC) is stripped by hardware. */ if (dev->data->dev_conf.rxmode.hw_strip_crc) { tmpl.rxq.crc_present = 0; } else if (priv->hw_fcs_strip) { - /* Ask HW/Verbs to leave CRC in place when supported. */ - attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; - attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; tmpl.rxq.crc_present = 1; } else { WARN("%p: CRC stripping has been disabled but will still" @@ -1013,60 +873,21 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, tmpl.rxq.crc_present ? "disabled" : "enabled", tmpl.rxq.crc_present << 2); #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING - if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) + if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) { ; /* Nothing else to do. */ - else if (priv->hw_padding) { + } else if (priv->hw_padding) { INFO("%p: enabling packet padding on queue %p", (void *)dev, (void *)rxq_ctrl); - attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; - attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; - } else + } else { WARN("%p: packet padding has been requested but is not" " supported, make sure MLNX_OFED and firmware are" " up to date", (void *)dev); -#endif - - tmpl.wq = ibv_create_wq(priv->ctx, &attr.wq); - if (tmpl.wq == NULL) { - ret = (errno ? errno : EINVAL); - ERROR("%p: WQ creation failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - /* - * Make sure number of WRs*SGEs match expectations since a queue - * cannot allocate more than "desc" buffers. - */ - if (((int)attr.wq.max_wr != (desc >> tmpl.rxq.sges_n)) || - ((int)attr.wq.max_sge != (1 << tmpl.rxq.sges_n))) { - ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs", - (void *)dev, - (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n), - attr.wq.max_wr, attr.wq.max_sge); - ret = EINVAL; - goto error; } +#endif /* Save port ID. */ tmpl.rxq.port_id = dev->data->port_id; DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id); - /* Change queue state to ready. */ - mod = (struct ibv_wq_attr){ - .attr_mask = IBV_WQ_ATTR_STATE, - .wq_state = IBV_WQS_RDY, - }; - ret = ibv_modify_wq(tmpl.wq, &mod); - if (ret) { - ERROR("%p: WQ state to IBV_WQS_RDY failed: %s", - (void *)dev, strerror(ret)); - goto error; - } - ret = rxq_setup(&tmpl); - if (ret) { - ERROR("%p: cannot initialize RX queue structure: %s", - (void *)dev, strerror(ret)); - goto error; - } ret = rxq_alloc_elts(&tmpl, desc); if (ret) { ERROR("%p: RXQ allocation failed: %s", @@ -1085,17 +906,12 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, rte_free(tmpl.rxq.elts); tmpl.rxq.elts = elts; *rxq_ctrl = tmpl; - /* Update doorbell counter. */ - rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n; - rte_wmb(); - *rxq_ctrl->rxq.rq_db = rte_cpu_to_be_32(rxq_ctrl->rxq.rq_ci); DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl); assert(ret == 0); return 0; error: - elts = tmpl.rxq.elts; + rte_free(tmpl.rxq.elts); mlx5_rxq_cleanup(&tmpl); - rte_free(elts); assert(ret > 0); return ret; } @@ -1185,14 +1001,20 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, } } ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp); - if (ret) + if (ret) { rte_free(rxq_ctrl); - else { - rxq_ctrl->rxq.stats.idx = idx; - DEBUG("%p: adding RX queue %p to list", - (void *)dev, (void *)rxq_ctrl); - (*priv->rxqs)[idx] = &rxq_ctrl->rxq; + goto out; + } + rxq_ctrl->rxq.stats.idx = idx; + DEBUG("%p: adding RX queue %p to list", + (void *)dev, (void *)rxq_ctrl); + (*priv->rxqs)[idx] = &rxq_ctrl->rxq; + rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx); + if (!rxq_ctrl->ibv) { + ret = EAGAIN; + goto out; } +out: priv_unlock(priv); return -ret; } @@ -1219,7 +1041,7 @@ mlx5_rx_queue_release(void *dpdk_rxq) rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); priv = rxq_ctrl->priv; priv_lock(priv); - if (priv_flow_rxq_in_use(priv, rxq)) + if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv)) rte_panic("Rx queue %p is still used by a flow and cannot be" " removed\n", (void *)rxq_ctrl); for (i = 0; (i != priv->rxqs_n); ++i) @@ -1264,15 +1086,14 @@ priv_rx_intr_vec_enable(struct priv *priv) } intr_handle->type = RTE_INTR_HANDLE_EXT; for (i = 0; i != n; ++i) { - struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; - struct mlx5_rxq_ctrl *rxq_ctrl = - container_of(rxq, struct mlx5_rxq_ctrl, rxq); + /* This rxq ibv must not be released in this function. */ + struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i); int fd; int flags; int rc; /* Skip queues that cannot request interrupts. */ - if (!rxq || !rxq_ctrl->channel) { + if (!rxq_ibv || !rxq_ibv->channel) { /* Use invalid intr_vec[] index to disable entry. */ intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + @@ -1286,7 +1107,7 @@ priv_rx_intr_vec_enable(struct priv *priv) priv_rx_intr_vec_disable(priv); return -1; } - fd = rxq_ctrl->channel->fd; + fd = rxq_ibv->channel->fd; flags = fcntl(fd, F_GETFL); rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); if (rc < 0) { @@ -1316,7 +1137,27 @@ void priv_rx_intr_vec_disable(struct priv *priv) { struct rte_intr_handle *intr_handle = priv->dev->intr_handle; + unsigned int i; + unsigned int rxqs_n = priv->rxqs_n; + unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); + + if (!priv->dev->data->dev_conf.intr_conf.rxq) + return; + for (i = 0; i != n; ++i) { + struct mlx5_rxq_ctrl *rxq_ctrl; + struct mlx5_rxq_data *rxq_data; + if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + + RTE_MAX_RXTX_INTR_VEC_ID) + continue; + /** + * Need to access directly the queue to release the reference + * kept in priv_rx_intr_vec_enable(). + */ + rxq_data = (*priv->rxqs)[i]; + rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); + mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv); + } rte_intr_free_epoll_fd(intr_handle); free(intr_handle->intr_vec); intr_handle->nb_efd = 0; @@ -1363,16 +1204,30 @@ int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) { struct priv *priv = mlx5_get_priv(dev); - struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; - struct mlx5_rxq_ctrl *rxq_ctrl = - container_of(rxq, struct mlx5_rxq_ctrl, rxq); + struct mlx5_rxq_data *rxq_data; + struct mlx5_rxq_ctrl *rxq_ctrl; int ret = 0; - if (!rxq || !rxq_ctrl->channel) { + priv_lock(priv); + rxq_data = (*priv->rxqs)[rx_queue_id]; + if (!rxq_data) { ret = EINVAL; - } else { - mlx5_arm_cq(rxq, rxq->cq_arm_sn); + goto exit; + } + rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); + if (rxq_ctrl->irq) { + struct mlx5_rxq_ibv *rxq_ibv; + + rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id); + if (!rxq_ibv) { + ret = EINVAL; + goto exit; + } + mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); + mlx5_priv_rxq_ibv_release(priv, rxq_ibv); } +exit: + priv_unlock(priv); if (ret) WARN("unable to arm interrupt on rx queue %d", rx_queue_id); return -ret; @@ -1393,25 +1248,345 @@ int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) { struct priv *priv = mlx5_get_priv(dev); - struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; - struct mlx5_rxq_ctrl *rxq_ctrl = - container_of(rxq, struct mlx5_rxq_ctrl, rxq); + struct mlx5_rxq_data *rxq_data; + struct mlx5_rxq_ctrl *rxq_ctrl; + struct mlx5_rxq_ibv *rxq_ibv = NULL; struct ibv_cq *ev_cq; void *ev_ctx; - int ret; + int ret = 0; - if (!rxq || !rxq_ctrl->channel) { + priv_lock(priv); + rxq_data = (*priv->rxqs)[rx_queue_id]; + if (!rxq_data) { ret = EINVAL; - } else { - ret = ibv_get_cq_event(rxq_ctrl->cq->channel, &ev_cq, &ev_ctx); - rxq->cq_arm_sn++; - if (ret || ev_cq != rxq_ctrl->cq) - ret = EINVAL; + goto exit; + } + rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); + if (!rxq_ctrl->irq) + goto exit; + rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id); + if (!rxq_ibv) { + ret = EINVAL; + goto exit; + } + ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); + if (ret || ev_cq != rxq_ibv->cq) { + ret = EINVAL; + goto exit; } + rxq_data->cq_arm_sn++; + ibv_ack_cq_events(rxq_ibv->cq, 1); +exit: + if (rxq_ibv) + mlx5_priv_rxq_ibv_release(priv, rxq_ibv); + priv_unlock(priv); if (ret) WARN("unable to disable interrupt on rx queue %d", rx_queue_id); - else - ibv_ack_cq_events(rxq_ctrl->cq, 1); return -ret; } + +/** + * Create the Rx queue Verbs object. + * + * @param priv + * Pointer to private structure. + * @param idx + * Queue index in DPDK Rx queue array + * + * @return + * The Verbs object initialised if it can be created. + */ +struct mlx5_rxq_ibv* +mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) +{ + struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); + struct ibv_wq_attr mod; + union { + struct ibv_cq_init_attr_ex cq; + struct ibv_wq_init_attr wq; + struct ibv_cq_ex cq_attr; + } attr; + unsigned int cqe_n = (1 << rxq_data->elts_n) - 1; + struct mlx5_rxq_ibv *tmpl; + struct mlx5dv_cq cq_info; + struct mlx5dv_rwq rwq; + unsigned int i; + int ret = 0; + struct mlx5dv_obj obj; + + assert(rxq_data); + assert(!rxq_ctrl->ibv); + tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, + rxq_ctrl->socket); + if (!tmpl) { + ERROR("%p: cannot allocate verbs resources", + (void *)rxq_ctrl); + goto error; + } + tmpl->rxq_ctrl = rxq_ctrl; + /* Use the entire RX mempool as the memory region. */ + tmpl->mr = priv_mr_get(priv, rxq_data->mp); + if (!tmpl->mr) { + tmpl->mr = priv_mr_new(priv, rxq_data->mp); + if (!tmpl->mr) { + ERROR("%p: MR creation failure", (void *)rxq_ctrl); + goto error; + } + } + if (rxq_ctrl->irq) { + tmpl->channel = ibv_create_comp_channel(priv->ctx); + if (!tmpl->channel) { + ERROR("%p: Comp Channel creation failure", + (void *)rxq_ctrl); + goto error; + } + } + attr.cq = (struct ibv_cq_init_attr_ex){ + .comp_mask = 0, + }; + if (priv->cqe_comp) { + attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS; + attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; + /* + * For vectorized Rx, it must not be doubled in order to + * make cq_ci and rq_ci aligned. + */ + if (rxq_check_vec_support(rxq_data) < 0) + cqe_n *= 2; + } + tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0); + if (tmpl->cq == NULL) { + ERROR("%p: CQ creation failure", (void *)rxq_ctrl); + goto error; + } + DEBUG("priv->device_attr.max_qp_wr is %d", + priv->device_attr.orig_attr.max_qp_wr); + DEBUG("priv->device_attr.max_sge is %d", + priv->device_attr.orig_attr.max_sge); + attr.wq = (struct ibv_wq_init_attr){ + .wq_context = NULL, /* Could be useful in the future. */ + .wq_type = IBV_WQT_RQ, + /* Max number of outstanding WRs. */ + .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n, + /* Max number of scatter/gather elements in a WR. */ + .max_sge = 1 << rxq_data->sges_n, + .pd = priv->pd, + .cq = tmpl->cq, + .comp_mask = + IBV_WQ_FLAGS_CVLAN_STRIPPING | + 0, + .create_flags = (rxq_data->vlan_strip ? + IBV_WQ_FLAGS_CVLAN_STRIPPING : + 0), + }; + /* By default, FCS (CRC) is stripped by hardware. */ + if (rxq_data->crc_present) { + attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; + attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; + } +#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING + if (priv->hw_padding) { + attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; + attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; + } +#endif + tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq); + if (tmpl->wq == NULL) { + ERROR("%p: WQ creation failure", (void *)rxq_ctrl); + goto error; + } + /* + * Make sure number of WRs*SGEs match expectations since a queue + * cannot allocate more than "desc" buffers. + */ + if (((int)attr.wq.max_wr != + ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) || + ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) { + ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs", + (void *)rxq_ctrl, + ((1 << rxq_data->elts_n) >> rxq_data->sges_n), + (1 << rxq_data->sges_n), + attr.wq.max_wr, attr.wq.max_sge); + goto error; + } + /* Change queue state to ready. */ + mod = (struct ibv_wq_attr){ + .attr_mask = IBV_WQ_ATTR_STATE, + .wq_state = IBV_WQS_RDY, + }; + ret = ibv_modify_wq(tmpl->wq, &mod); + if (ret) { + ERROR("%p: WQ state to IBV_WQS_RDY failed", + (void *)rxq_ctrl); + goto error; + } + obj.cq.in = tmpl->cq; + obj.cq.out = &cq_info; + obj.rwq.in = tmpl->wq; + obj.rwq.out = &rwq; + ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); + if (ret != 0) + goto error; + if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { + ERROR("Wrong MLX5_CQE_SIZE environment variable value: " + "it should be set to %u", RTE_CACHE_LINE_SIZE); + goto error; + } + /* Fill the rings. */ + rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[]) + (uintptr_t)rwq.buf; + for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) { + struct rte_mbuf *buf = (*rxq_data->elts)[i]; + volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i]; + + /* scat->addr must be able to store a pointer. */ + assert(sizeof(scat->addr) >= sizeof(uintptr_t)); + *scat = (struct mlx5_wqe_data_seg){ + .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, + uintptr_t)), + .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), + .lkey = tmpl->mr->lkey, + }; + } + rxq_data->rq_db = rwq.dbrec; + rxq_data->cqe_n = log2above(cq_info.cqe_cnt); + rxq_data->cq_ci = 0; + rxq_data->rq_ci = 0; + rxq_data->rq_pi = 0; + rxq_data->zip = (struct rxq_zip){ + .ai = 0, + }; + rxq_data->cq_db = cq_info.dbrec; + rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; + /* Update doorbell counter. */ + rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n; + rte_wmb(); + *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci); + DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl); + rte_atomic32_inc(&tmpl->refcnt); + DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, + (void *)tmpl, rte_atomic32_read(&tmpl->refcnt)); + LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); + return tmpl; +error: + if (tmpl->wq) + claim_zero(ibv_destroy_wq(tmpl->wq)); + if (tmpl->cq) + claim_zero(ibv_destroy_cq(tmpl->cq)); + if (tmpl->channel) + claim_zero(ibv_destroy_comp_channel(tmpl->channel)); + if (tmpl->mr) + priv_mr_release(priv, tmpl->mr); + return NULL; +} + +/** + * Get an Rx queue Verbs object. + * + * @param priv + * Pointer to private structure. + * @param idx + * Queue index in DPDK Rx queue array + * + * @return + * The Verbs object if it exists. + */ +struct mlx5_rxq_ibv* +mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx) +{ + struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; + struct mlx5_rxq_ctrl *rxq_ctrl; + + if (idx >= priv->rxqs_n) + return NULL; + if (!rxq_data) + return NULL; + rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); + if (rxq_ctrl->ibv) { + priv_mr_get(priv, rxq_data->mp); + rte_atomic32_inc(&rxq_ctrl->ibv->refcnt); + DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, + (void *)rxq_ctrl->ibv, + rte_atomic32_read(&rxq_ctrl->ibv->refcnt)); + } + return rxq_ctrl->ibv; +} + +/** + * Release an Rx verbs queue object. + * + * @param priv + * Pointer to private structure. + * @param rxq_ibv + * Verbs Rx queue object. + * + * @return + * 0 on success, errno value on failure. + */ +int +mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) +{ + int ret; + + assert(rxq_ibv); + assert(rxq_ibv->wq); + assert(rxq_ibv->cq); + assert(rxq_ibv->mr); + ret = priv_mr_release(priv, rxq_ibv->mr); + if (!ret) + rxq_ibv->mr = NULL; + DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, + (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt)); + if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { + rxq_free_elts(rxq_ibv->rxq_ctrl); + claim_zero(ibv_destroy_wq(rxq_ibv->wq)); + claim_zero(ibv_destroy_cq(rxq_ibv->cq)); + if (rxq_ibv->channel) + claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel)); + LIST_REMOVE(rxq_ibv, next); + rte_free(rxq_ibv); + return 0; + } + return EBUSY; +} + +/** + * Verify the Verbs Rx queue list is empty + * + * @param priv + * Pointer to private structure. + * + * @return the number of object not released. + */ +int +mlx5_priv_rxq_ibv_verify(struct priv *priv) +{ + int ret = 0; + struct mlx5_rxq_ibv *rxq_ibv; + + LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) { + DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv, + (void *)rxq_ibv); + ++ret; + } + return ret; +} + +/** + * Return true if a single reference exists on the object. + * + * @param priv + * Pointer to private structure. + * @param rxq_ibv + * Verbs Rx queue object. + */ +int +mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) +{ + (void)priv; + assert(rxq_ibv); + return (rte_atomic32_read(&rxq_ibv->refcnt) == 1); +} diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 89e60ea361..812fbb1853 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -134,15 +134,24 @@ struct mlx5_rxq_data { uint8_t cq_arm_sn; /* CQ arm seq number. */ } __rte_cache_aligned; -/* RX queue control descriptor. */ -struct mlx5_rxq_ctrl { - struct priv *priv; /* Back pointer to private data. */ +/* Verbs Rx queue elements. */ +struct mlx5_rxq_ibv { + LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */ + rte_atomic32_t refcnt; /* Reference counter. */ + struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */ struct ibv_cq *cq; /* Completion Queue. */ struct ibv_wq *wq; /* Work Queue. */ - struct mlx5_mr *mr; /* Memory Region (for mp). */ struct ibv_comp_channel *channel; - unsigned int socket; /* CPU socket ID for allocations. */ + struct mlx5_mr *mr; /* Memory Region (for mp). */ +}; + +/* RX queue control descriptor. */ +struct mlx5_rxq_ctrl { + struct priv *priv; /* Back pointer to private data. */ + struct mlx5_rxq_ibv *ibv; /* Verbs elements. */ struct mlx5_rxq_data rxq; /* Data path structure. */ + unsigned int socket; /* CPU socket ID for allocations. */ + unsigned int irq:1; /* Whether IRQ is enabled. */ }; /* Hash RX queue types. */ @@ -310,6 +319,11 @@ int priv_rx_intr_vec_enable(struct priv *priv); void priv_rx_intr_vec_disable(struct priv *priv); int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id); int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id); +struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_new(struct priv *, uint16_t); +struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t); +int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *); +int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *); +int mlx5_priv_rxq_ibv_verify(struct priv *); /* mlx5_txq.c */ @@ -347,7 +361,6 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t); /* mlx5_mr.c */ -struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *); void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *); struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *, unsigned int); diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c index 0d91591d55..d707984a56 100644 --- a/drivers/net/mlx5/mlx5_vlan.c +++ b/drivers/net/mlx5/mlx5_vlan.c @@ -154,7 +154,7 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on) .flags = vlan_offloads, }; - err = ibv_modify_wq(rxq_ctrl->wq, &mod); + err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod); if (err) { ERROR("%p: failed to modified stripping mode: %s", (void *)priv, strerror(err));