From 2d77cb615b2c126828c4e98b686523bf0139c1c1 Mon Sep 17 00:00:00 2001 From: Matan Azrad Date: Thu, 30 May 2019 10:20:38 +0000 Subject: [PATCH] net/mlx5: recover secondary process Rx errors The RQ errors recovery mechanism in the PMD invokes a Verbs functions to modify the RQ states in order to reset the RQ and to reactivate it. These Verbs functions are not allowed to be invoked from a secondary process, hence the PMD skips the recovery when the error is captured by secondary processes queues. Using the DPDK IPC mechanism the secondary process can request Verbs queues state modifications to be done synchronically by the primary process. Add support for secondary process Rx errors recovery. Cc: stable@dpdk.org Signed-off-by: Matan Azrad Acked-by: Shahaf Shuler --- drivers/net/mlx5/mlx5.h | 11 ++++ drivers/net/mlx5/mlx5_mp.c | 46 ++++++++++++++++ drivers/net/mlx5/mlx5_rxtx.c | 98 +++++++++++++++++++++++++++------ drivers/net/mlx5/mlx5_rxtx.h | 3 + drivers/net/mlx5/mlx5_trigger.c | 1 + 5 files changed, 141 insertions(+), 18 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 7f5cb8f68d..f779b4610b 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -61,6 +61,13 @@ enum mlx5_mp_req_type { MLX5_MP_REQ_CREATE_MR, MLX5_MP_REQ_START_RXTX, MLX5_MP_REQ_STOP_RXTX, + MLX5_MP_REQ_QUEUE_STATE_MODIFY, +}; + +struct mlx5_mp_arg_queue_state_modify { + uint8_t is_wq; /* Set if WQ. */ + uint16_t queue_id; /* DPDK queue ID. */ + enum ibv_wq_state state; /* WQ requested state. */ }; /* Pameters for IPC. */ @@ -71,6 +78,8 @@ struct mlx5_mp_param { RTE_STD_C11 union { uintptr_t addr; /* MLX5_MP_REQ_CREATE_MR */ + struct mlx5_mp_arg_queue_state_modify state_modify; + /* MLX5_MP_REQ_QUEUE_STATE_MODIFY */ } args; }; @@ -546,6 +555,8 @@ void mlx5_mp_req_start_rxtx(struct rte_eth_dev *dev); void mlx5_mp_req_stop_rxtx(struct rte_eth_dev *dev); int mlx5_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr); int mlx5_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev); +int mlx5_mp_req_queue_state_modify(struct rte_eth_dev *dev, + struct mlx5_mp_arg_queue_state_modify *sm); int mlx5_mp_init_primary(void); void mlx5_mp_uninit_primary(void); int mlx5_mp_init_secondary(void); diff --git a/drivers/net/mlx5/mlx5_mp.c b/drivers/net/mlx5/mlx5_mp.c index 37f26cc71c..2a031e2610 100644 --- a/drivers/net/mlx5/mlx5_mp.c +++ b/drivers/net/mlx5/mlx5_mp.c @@ -85,6 +85,12 @@ mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) res->result = 0; ret = rte_mp_reply(&mp_res, peer); break; + case MLX5_MP_REQ_QUEUE_STATE_MODIFY: + mp_init_msg(dev, &mp_res, param->type); + res->result = mlx5_queue_state_modify_primary + (dev, ¶m->args.state_modify); + ret = rte_mp_reply(&mp_res, peer); + break; default: rte_errno = EINVAL; DRV_LOG(ERR, "port %u invalid mp request type", @@ -271,6 +277,46 @@ mlx5_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr) return ret; } +/** + * Request Verbs queue state modification to the primary process. + * + * @param[in] dev + * Pointer to Ethernet structure. + * @param sm + * State modify parameters. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_mp_req_queue_state_modify(struct rte_eth_dev *dev, + struct mlx5_mp_arg_queue_state_modify *sm) +{ + struct rte_mp_msg mp_req; + struct rte_mp_msg *mp_res; + struct rte_mp_reply mp_rep; + struct mlx5_mp_param *req = (struct mlx5_mp_param *)mp_req.param; + struct mlx5_mp_param *res; + struct timespec ts = {.tv_sec = MLX5_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0}; + int ret; + + assert(rte_eal_process_type() == RTE_PROC_SECONDARY); + mp_init_msg(dev, &mp_req, MLX5_MP_REQ_QUEUE_STATE_MODIFY); + req->args.state_modify = *sm; + ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); + if (ret) { + DRV_LOG(ERR, "port %u request to primary process failed", + dev->data->port_id); + return -rte_errno; + } + assert(mp_rep.nb_received == 1); + mp_res = &mp_rep.msgs[0]; + res = (struct mlx5_mp_param *)mp_res->param; + ret = res->result; + free(mp_rep.msgs); + return ret; +} + /** * Request Verbs command file descriptor for mmap to the primary process. * diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 875ff227ca..3857c1668c 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -2030,6 +2030,75 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); } +/** + * Modify a Verbs queue state. + * This must be called from the primary process. + * + * @param dev + * Pointer to Ethernet device. + * @param sm + * State modify request parameters. + * + * @return + * 0 in case of success else non-zero value and rte_errno is set. + */ +int +mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, + const struct mlx5_mp_arg_queue_state_modify *sm) +{ + int ret; + struct mlx5_priv *priv = dev->data->dev_private; + + if (sm->is_wq) { + struct ibv_wq_attr mod = { + .attr_mask = IBV_WQ_ATTR_STATE, + .wq_state = sm->state, + }; + struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + + ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod); + if (ret) { + DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s\n", + sm->state, strerror(errno)); + rte_errno = errno; + return ret; + } + } + return 0; +} + +/** + * Modify a Verbs queue state. + * + * @param dev + * Pointer to Ethernet device. + * @param sm + * State modify request parameters. + * + * @return + * 0 in case of success else non-zero value. + */ +static int +mlx5_queue_state_modify(struct rte_eth_dev *dev, + struct mlx5_mp_arg_queue_state_modify *sm) +{ + int ret = 0; + + switch (rte_eal_process_type()) { + case RTE_PROC_PRIMARY: + ret = mlx5_queue_state_modify_primary(dev, sm); + break; + case RTE_PROC_SECONDARY: + ret = mlx5_mp_req_queue_state_modify(dev, sm); + break; + default: + break; + } + return ret; +} + /** * Handle a Rx error. * The function inserts the RQ state to reset when the first error CQE is @@ -2053,15 +2122,13 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) const unsigned int wqe_n = 1 << rxq->elts_n; struct mlx5_rxq_ctrl *rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); - struct ibv_wq_attr mod = { - .attr_mask = IBV_WQ_ATTR_STATE, - }; union { volatile struct mlx5_cqe *cqe; volatile struct mlx5_err_cqe *err_cqe; } u = { .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], }; + struct mlx5_mp_arg_queue_state_modify sm; int ret; switch (rxq->err_state) { @@ -2069,21 +2136,17 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; /* Fall-through */ case MLX5_RXQ_ERR_STATE_NEED_RESET: - if (rte_eal_process_type() != RTE_PROC_PRIMARY) + sm.is_wq = 1; + sm.queue_id = rxq->idx; + sm.state = IBV_WQS_RESET; + if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) return -1; - mod.wq_state = IBV_WQS_RESET; - ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod); - if (ret) { - DRV_LOG(ERR, "Cannot change Rx WQ state to RESET %s\n", - strerror(errno)); - return -1; - } if (rxq_ctrl->dump_file_n < rxq_ctrl->priv->config.max_dump_files_num) { MKSTR(err_str, "Unexpected CQE error syndrome " "0x%02x CQN = %u RQN = %u wqe_counter = %u" " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, - rxq->cqn, rxq_ctrl->ibv->wq->wq_num, + rxq->cqn, rxq_ctrl->wqn, rte_be_to_cpu_16(u.err_cqe->wqe_counter), rxq->rq_ci << rxq->sges_n, rxq->cq_ci); MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", @@ -2113,13 +2176,12 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) */ *rxq->rq_db = rte_cpu_to_be_32(0); rte_cio_wmb(); - mod.wq_state = IBV_WQS_RDY; - ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod); - if (ret) { - DRV_LOG(ERR, "Cannot change Rx WQ state to RDY" - " %s\n", strerror(errno)); + sm.is_wq = 1; + sm.queue_id = rxq->idx; + sm.state = IBV_WQS_RDY; + if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), + &sm)) return -1; - } if (mbuf_prepare) { const uint16_t q_mask = wqe_n - 1; uint16_t elt_idx; diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 81c3c5feb9..3d79c18b60 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -161,6 +161,7 @@ struct mlx5_rxq_ctrl { unsigned int irq:1; /* Whether IRQ is enabled. */ uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */ uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */ + uint32_t wqn; /* WQ number. */ uint16_t dump_file_n; /* Number of dump files. */ }; @@ -355,6 +356,8 @@ int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); uint32_t mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id); void mlx5_dump_debug_information(const char *path, const char *title, const void *buf, unsigned int len); +int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, + const struct mlx5_mp_arg_queue_state_modify *sm); /* Vectorized version of mlx5_rxtx.c */ int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev); diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index d5ce74d8c1..864c985d8b 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -126,6 +126,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev) rxq_ctrl->ibv = mlx5_rxq_ibv_new(dev, i); if (!rxq_ctrl->ibv) goto error; + rxq_ctrl->wqn = rxq_ctrl->ibv->wq->wq_num; } return 0; error: -- 2.20.1