]> git.droids-corp.org - dpdk.git/commitdiff
net/mlx5: recover secondary process Rx errors
authorMatan Azrad <matan@mellanox.com>
Thu, 30 May 2019 10:20:38 +0000 (10:20 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Thu, 13 Jun 2019 15:01:06 +0000 (00:01 +0900)
The RQ errors recovery mechanism in the PMD invokes a Verbs functions to
modify the RQ states in order to reset the RQ and to reactivate it.

These Verbs functions are not allowed to be invoked from a secondary
process, hence the PMD skips the recovery when the error is captured by
secondary processes queues.

Using the DPDK IPC mechanism the secondary process can request Verbs
queues state modifications to be done synchronically by the primary
process.

Add support for secondary process Rx errors recovery.

Cc: stable@dpdk.org
Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_mp.c
drivers/net/mlx5/mlx5_rxtx.c
drivers/net/mlx5/mlx5_rxtx.h
drivers/net/mlx5/mlx5_trigger.c

index 7f5cb8f68db0e307a0b7be55d6a799be6dce0e8a..f779b4610b7c5a64b925cb53c20d853451718bab 100644 (file)
@@ -61,6 +61,13 @@ enum mlx5_mp_req_type {
        MLX5_MP_REQ_CREATE_MR,
        MLX5_MP_REQ_START_RXTX,
        MLX5_MP_REQ_STOP_RXTX,
+       MLX5_MP_REQ_QUEUE_STATE_MODIFY,
+};
+
+struct mlx5_mp_arg_queue_state_modify {
+       uint8_t is_wq; /* Set if WQ. */
+       uint16_t queue_id; /* DPDK queue ID. */
+       enum ibv_wq_state state; /* WQ requested state. */
 };
 
 /* Pameters for IPC. */
@@ -71,6 +78,8 @@ struct mlx5_mp_param {
        RTE_STD_C11
        union {
                uintptr_t addr; /* MLX5_MP_REQ_CREATE_MR */
+               struct mlx5_mp_arg_queue_state_modify state_modify;
+               /* MLX5_MP_REQ_QUEUE_STATE_MODIFY */
        } args;
 };
 
@@ -546,6 +555,8 @@ void mlx5_mp_req_start_rxtx(struct rte_eth_dev *dev);
 void mlx5_mp_req_stop_rxtx(struct rte_eth_dev *dev);
 int mlx5_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr);
 int mlx5_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
+int mlx5_mp_req_queue_state_modify(struct rte_eth_dev *dev,
+                                  struct mlx5_mp_arg_queue_state_modify *sm);
 int mlx5_mp_init_primary(void);
 void mlx5_mp_uninit_primary(void);
 int mlx5_mp_init_secondary(void);
index 37f26cc71cae6c1ccda5ebf841771ba24d915af0..2a031e26101b564ea312be3f10506722610fcae4 100644 (file)
@@ -85,6 +85,12 @@ mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
                res->result = 0;
                ret = rte_mp_reply(&mp_res, peer);
                break;
+       case MLX5_MP_REQ_QUEUE_STATE_MODIFY:
+               mp_init_msg(dev, &mp_res, param->type);
+               res->result = mlx5_queue_state_modify_primary
+                                       (dev, &param->args.state_modify);
+               ret = rte_mp_reply(&mp_res, peer);
+               break;
        default:
                rte_errno = EINVAL;
                DRV_LOG(ERR, "port %u invalid mp request type",
@@ -271,6 +277,46 @@ mlx5_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr)
        return ret;
 }
 
+/**
+ * Request Verbs queue state modification to the primary process.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet structure.
+ * @param sm
+ *   State modify parameters.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_mp_req_queue_state_modify(struct rte_eth_dev *dev,
+                              struct mlx5_mp_arg_queue_state_modify *sm)
+{
+       struct rte_mp_msg mp_req;
+       struct rte_mp_msg *mp_res;
+       struct rte_mp_reply mp_rep;
+       struct mlx5_mp_param *req = (struct mlx5_mp_param *)mp_req.param;
+       struct mlx5_mp_param *res;
+       struct timespec ts = {.tv_sec = MLX5_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+       int ret;
+
+       assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+       mp_init_msg(dev, &mp_req, MLX5_MP_REQ_QUEUE_STATE_MODIFY);
+       req->args.state_modify = *sm;
+       ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+       if (ret) {
+               DRV_LOG(ERR, "port %u request to primary process failed",
+                       dev->data->port_id);
+               return -rte_errno;
+       }
+       assert(mp_rep.nb_received == 1);
+       mp_res = &mp_rep.msgs[0];
+       res = (struct mlx5_mp_param *)mp_res->param;
+       ret = res->result;
+       free(mp_rep.msgs);
+       return ret;
+}
+
 /**
  * Request Verbs command file descriptor for mmap to the primary process.
  *
index 875ff227cadae03ff2d9b80303b1c4884094eaa9..3857c1668cd3ccd66ed7a2eb94ac3c4e5b6ca827 100644 (file)
@@ -2030,6 +2030,75 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
        *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 }
 
+/**
+ * Modify a Verbs queue state.
+ * This must be called from the primary process.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param sm
+ *   State modify request parameters.
+ *
+ * @return
+ *   0 in case of success else non-zero value and rte_errno is set.
+ */
+int
+mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
+                       const struct mlx5_mp_arg_queue_state_modify *sm)
+{
+       int ret;
+       struct mlx5_priv *priv = dev->data->dev_private;
+
+       if (sm->is_wq) {
+               struct ibv_wq_attr mod = {
+                       .attr_mask = IBV_WQ_ATTR_STATE,
+                       .wq_state = sm->state,
+               };
+               struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id];
+               struct mlx5_rxq_ctrl *rxq_ctrl =
+                       container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+
+               ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
+               if (ret) {
+                       DRV_LOG(ERR, "Cannot change Rx WQ state to %u  - %s\n",
+                                       sm->state, strerror(errno));
+                       rte_errno = errno;
+                       return ret;
+               }
+       }
+       return 0;
+}
+
+/**
+ * Modify a Verbs queue state.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param sm
+ *   State modify request parameters.
+ *
+ * @return
+ *   0 in case of success else non-zero value.
+ */
+static int
+mlx5_queue_state_modify(struct rte_eth_dev *dev,
+                       struct mlx5_mp_arg_queue_state_modify *sm)
+{
+       int ret = 0;
+
+       switch (rte_eal_process_type()) {
+       case RTE_PROC_PRIMARY:
+               ret = mlx5_queue_state_modify_primary(dev, sm);
+               break;
+       case RTE_PROC_SECONDARY:
+               ret = mlx5_mp_req_queue_state_modify(dev, sm);
+               break;
+       default:
+               break;
+       }
+       return ret;
+}
+
 /**
  * Handle a Rx error.
  * The function inserts the RQ state to reset when the first error CQE is
@@ -2053,15 +2122,13 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
        const unsigned int wqe_n = 1 << rxq->elts_n;
        struct mlx5_rxq_ctrl *rxq_ctrl =
                        container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-       struct ibv_wq_attr mod = {
-               .attr_mask = IBV_WQ_ATTR_STATE,
-       };
        union {
                volatile struct mlx5_cqe *cqe;
                volatile struct mlx5_err_cqe *err_cqe;
        } u = {
                .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
        };
+       struct mlx5_mp_arg_queue_state_modify sm;
        int ret;
 
        switch (rxq->err_state) {
@@ -2069,21 +2136,17 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
                rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
                /* Fall-through */
        case MLX5_RXQ_ERR_STATE_NEED_RESET:
-               if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               sm.is_wq = 1;
+               sm.queue_id = rxq->idx;
+               sm.state = IBV_WQS_RESET;
+               if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm))
                        return -1;
-               mod.wq_state = IBV_WQS_RESET;
-               ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
-               if (ret) {
-                       DRV_LOG(ERR, "Cannot change Rx WQ state to RESET %s\n",
-                               strerror(errno));
-                       return -1;
-               }
                if (rxq_ctrl->dump_file_n <
                    rxq_ctrl->priv->config.max_dump_files_num) {
                        MKSTR(err_str, "Unexpected CQE error syndrome "
                              "0x%02x CQN = %u RQN = %u wqe_counter = %u"
                              " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
-                             rxq->cqn, rxq_ctrl->ibv->wq->wq_num,
+                             rxq->cqn, rxq_ctrl->wqn,
                              rte_be_to_cpu_16(u.err_cqe->wqe_counter),
                              rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
                        MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
@@ -2113,13 +2176,12 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
                         */
                        *rxq->rq_db = rte_cpu_to_be_32(0);
                        rte_cio_wmb();
-                       mod.wq_state = IBV_WQS_RDY;
-                       ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
-                       if (ret) {
-                               DRV_LOG(ERR, "Cannot change Rx WQ state to RDY"
-                                       " %s\n", strerror(errno));
+                       sm.is_wq = 1;
+                       sm.queue_id = rxq->idx;
+                       sm.state = IBV_WQS_RDY;
+                       if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv),
+                                                   &sm))
                                return -1;
-                       }
                        if (mbuf_prepare) {
                                const uint16_t q_mask = wqe_n - 1;
                                uint16_t elt_idx;
index 81c3c5feb96e7c35f2f6a14ff134383f18db9eec..3d79c18b606d097bf62da228dc826619b1371c37 100644 (file)
@@ -161,6 +161,7 @@ struct mlx5_rxq_ctrl {
        unsigned int irq:1; /* Whether IRQ is enabled. */
        uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
        uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
+       uint32_t wqn; /* WQ number. */
        uint16_t dump_file_n; /* Number of dump files. */
 };
 
@@ -355,6 +356,8 @@ int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset);
 uint32_t mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 void mlx5_dump_debug_information(const char *path, const char *title,
                                 const void *buf, unsigned int len);
+int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
+                       const struct mlx5_mp_arg_queue_state_modify *sm);
 
 /* Vectorized version of mlx5_rxtx.c */
 int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev);
index d5ce74d8c12e070cd19fc0c7fb5486ee6cc4aeea..864c985d8b461056f978184c1d6691fc9321c243 100644 (file)
@@ -126,6 +126,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
                rxq_ctrl->ibv = mlx5_rxq_ibv_new(dev, i);
                if (!rxq_ctrl->ibv)
                        goto error;
+               rxq_ctrl->wqn = rxq_ctrl->ibv->wq->wq_num;
        }
        return 0;
 error: