X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fvdpa%2Fmlx5%2Fmlx5_vdpa_event.c;h=404e135d5c37c17f8d9ed1635bd78ba809fc5878;hb=2ef79bea8fd908e7efcabb6c42cc4eb5e8d561b3;hp=5a2d4fb1ec01e5ee890530456cde64cc984fa0d5;hpb=99abbd62c272bef0a86f8df6a99c3bc5642e6b59;p=dpdk.git diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c index 5a2d4fb1ec..404e135d5c 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c @@ -3,10 +3,12 @@ */ #include #include +#include #include #include #include +#include #include #include #include @@ -15,11 +17,16 @@ #include #include +#include +#include +#include #include "mlx5_vdpa_utils.h" #include "mlx5_vdpa.h" +#define MLX5_VDPA_ERROR_TIME_SEC 3u + void mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv) { @@ -40,25 +47,21 @@ mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv) sizeof(out.buf)) >= (ssize_t)sizeof(out.event_resp.cookie)) ; - mlx5_glue->devx_destroy_event_channel(priv->eventc); + mlx5_os_devx_destroy_event_channel(priv->eventc); priv->eventc = NULL; } #endif - priv->eqn = 0; } /* Prepare all the global resources for all the event objects.*/ static int mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv) { + int flags, ret; + if (priv->eventc) return 0; - if (mlx5_glue->devx_query_eqn(priv->ctx, 0, &priv->eqn)) { - rte_errno = errno; - DRV_LOG(ERR, "Failed to query EQ number %d.", rte_errno); - return -1; - } - priv->eventc = mlx5_glue->devx_create_event_channel(priv->ctx, + priv->eventc = mlx5_os_devx_create_event_channel(priv->ctx, MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); if (!priv->eventc) { rte_errno = errno; @@ -66,7 +69,18 @@ mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv) rte_errno); goto error; } - priv->uar = mlx5_glue->devx_alloc_uar(priv->ctx, 0); + flags = fcntl(priv->eventc->fd, F_GETFL); + ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK); + if (ret) { + DRV_LOG(ERR, "Failed to change event channel FD."); + goto error; + } + /* + * This PMD always claims the write memory barrier on UAR + * registers writings, it is safe to allocate UAR with any + * memory mapping type. + */ + priv->uar = mlx5_devx_alloc_uar(priv->ctx, -1); if (!priv->uar) { rte_errno = errno; DRV_LOG(ERR, "Failed to allocate UAR."); @@ -81,12 +95,7 @@ error: static void mlx5_vdpa_cq_destroy(struct mlx5_vdpa_cq *cq) { - if (cq->cq) - claim_zero(mlx5_devx_cmd_destroy(cq->cq)); - if (cq->umem_obj) - claim_zero(mlx5_glue->devx_umem_dereg(cq->umem_obj)); - if (cq->umem_buf) - rte_free((void *)(uintptr_t)cq->umem_buf); + mlx5_devx_cq_destroy(&cq->cq_obj); memset(cq, 0, sizeof(*cq)); } @@ -96,12 +105,12 @@ mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq) uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET; uint32_t cq_ci = cq->cq_ci & MLX5_CI_MASK; uint32_t doorbell_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | cq_ci; - uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq->id; + uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq_obj.cq->id; uint64_t db_be = rte_cpu_to_be_64(doorbell); uint32_t *addr = RTE_PTR_ADD(priv->uar->base_addr, MLX5_CQ_DOORBELL); rte_io_wmb(); - cq->db_rec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); + cq->cq_obj.db_rec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); rte_wmb(); #ifdef RTE_ARCH_64 *(uint64_t *)addr = db_be; @@ -118,52 +127,25 @@ static int mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t log_desc_n, int callfd, struct mlx5_vdpa_cq *cq) { - struct mlx5_devx_cq_attr attr = {0}; - size_t pgsize = sysconf(_SC_PAGESIZE); - uint32_t umem_size; + struct mlx5_devx_cq_attr attr = { + .use_first_only = 1, + .uar_page_id = priv->uar->page_id, + }; uint16_t event_nums[1] = {0}; - uint16_t cq_size = 1 << log_desc_n; int ret; - cq->log_desc_n = log_desc_n; - umem_size = sizeof(struct mlx5_cqe) * cq_size + sizeof(*cq->db_rec) * 2; - cq->umem_buf = rte_zmalloc(__func__, umem_size, 4096); - if (!cq->umem_buf) { - DRV_LOG(ERR, "Failed to allocate memory for CQ."); - rte_errno = ENOMEM; - return -ENOMEM; - } - cq->umem_obj = mlx5_glue->devx_umem_reg(priv->ctx, - (void *)(uintptr_t)cq->umem_buf, - umem_size, - IBV_ACCESS_LOCAL_WRITE); - if (!cq->umem_obj) { - DRV_LOG(ERR, "Failed to register umem for CQ."); - goto error; - } - attr.q_umem_valid = 1; - attr.db_umem_valid = 1; - attr.use_first_only = 1; - attr.overrun_ignore = 0; - attr.uar_page_id = priv->uar->page_id; - attr.q_umem_id = cq->umem_obj->umem_id; - attr.q_umem_offset = 0; - attr.db_umem_id = cq->umem_obj->umem_id; - attr.db_umem_offset = sizeof(struct mlx5_cqe) * cq_size; - attr.eqn = priv->eqn; - attr.log_cq_size = log_desc_n; - attr.log_page_size = rte_log2_u32(pgsize); - cq->cq = mlx5_devx_cmd_create_cq(priv->ctx, &attr); - if (!cq->cq) + ret = mlx5_devx_cq_create(priv->ctx, &cq->cq_obj, log_desc_n, &attr, + SOCKET_ID_ANY); + if (ret) goto error; - cq->db_rec = RTE_PTR_ADD(cq->umem_buf, (uintptr_t)attr.db_umem_offset); cq->cq_ci = 0; + cq->log_desc_n = log_desc_n; rte_spinlock_init(&cq->sl); /* Subscribe CQ event to the event channel controlled by the driver. */ - ret = mlx5_glue->devx_subscribe_devx_event(priv->eventc, cq->cq->obj, - sizeof(event_nums), - event_nums, - (uint64_t)(uintptr_t)cq); + ret = mlx5_os_devx_subscribe_devx_event(priv->eventc, + cq->cq_obj.cq->obj, + sizeof(event_nums), event_nums, + (uint64_t)(uintptr_t)cq); if (ret) { DRV_LOG(ERR, "Failed to subscribe CQE event."); rte_errno = errno; @@ -171,8 +153,8 @@ mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t log_desc_n, } cq->callfd = callfd; /* Init CQ to ones to be in HW owner in the start. */ - cq->cqes[0].op_own = MLX5_CQE_OWNER_MASK; - cq->cqes[0].wqe_counter = rte_cpu_to_be_16(cq_size - 1); + cq->cq_obj.cqes[0].op_own = MLX5_CQE_OWNER_MASK; + cq->cq_obj.cqes[0].wqe_counter = rte_cpu_to_be_16(UINT16_MAX); /* First arming. */ mlx5_vdpa_cq_arm(priv, cq); return 0; @@ -187,7 +169,6 @@ mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq) struct mlx5_vdpa_event_qp *eqp = container_of(cq, struct mlx5_vdpa_event_qp, cq); const unsigned int cq_size = 1 << cq->log_desc_n; - const unsigned int cq_mask = cq_size - 1; union { struct { uint16_t wqe_counter; @@ -196,17 +177,15 @@ mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq) }; uint32_t word; } last_word; - uint16_t next_wqe_counter = cq->cq_ci & cq_mask; + uint16_t next_wqe_counter = cq->cq_ci; uint16_t cur_wqe_counter; uint16_t comp; - last_word.word = rte_read32(&cq->cqes[0].wqe_counter); + last_word.word = rte_read32(&cq->cq_obj.cqes[0].wqe_counter); cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter); - comp = (cur_wqe_counter + 1u - next_wqe_counter) & cq_mask; + comp = cur_wqe_counter + (uint16_t)1 - next_wqe_counter; if (comp) { cq->cq_ci += comp; - MLX5_ASSERT(!!(cq->cq_ci & cq_size) == - MLX5_CQE_OWNER(last_word.op_own)); MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) != MLX5_CQE_INVALID); if (unlikely(!(MLX5_CQE_OPCODE(last_word.op_own) == @@ -216,7 +195,7 @@ mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq) cq->errors++; rte_io_wmb(); /* Ring CQ doorbell record. */ - cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); + cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); rte_io_wmb(); /* Ring SW QP doorbell record. */ eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size); @@ -232,7 +211,7 @@ mlx5_vdpa_arm_all_cqs(struct mlx5_vdpa_priv *priv) for (i = 0; i < priv->nr_virtqs; i++) { cq = &priv->virtqs[i].eqp.cq; - if (cq->cq && !cq->armed) + if (cq->cq_obj.cq && !cq->armed) mlx5_vdpa_cq_arm(priv, cq); } } @@ -252,7 +231,11 @@ mlx5_vdpa_timer_sleep(struct mlx5_vdpa_priv *priv, uint32_t max) break; } } - usleep(priv->timer_delay_us); + if (priv->timer_delay_us) + usleep(priv->timer_delay_us); + else + /* Give-up CPU to improve polling threads scheduling. */ + sched_yield(); } static void * @@ -277,7 +260,7 @@ mlx5_vdpa_poll_handle(void *arg) pthread_mutex_lock(&priv->vq_config_lock); for (i = 0; i < priv->nr_virtqs; i++) { cq = &priv->virtqs[i].eqp.cq; - if (cq->cq && !cq->armed) { + if (cq->cq_obj.cq && !cq->armed) { uint32_t comp = mlx5_vdpa_cq_poll(cq); if (comp) { @@ -356,7 +339,7 @@ mlx5_vdpa_interrupt_handler(void *cb_arg) DRV_LOG(DEBUG, "Device %s virtq %d cq %d event was captured." " Timer is %s, cq ci is %u.\n", priv->vdev->device->name, - (int)virtq->index, cq->cq->id, + (int)virtq->index, cq->cq_obj.cq->id, priv->timer_on ? "on" : "off", cq->cq_ci); cq->armed = 0; } @@ -373,11 +356,157 @@ mlx5_vdpa_interrupt_handler(void *cb_arg) pthread_mutex_unlock(&priv->vq_config_lock); } +static void +mlx5_vdpa_err_interrupt_handler(void *cb_arg __rte_unused) +{ +#ifdef HAVE_IBV_DEVX_EVENT + struct mlx5_vdpa_priv *priv = cb_arg; + union { + struct mlx5dv_devx_async_event_hdr event_resp; + uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128]; + } out; + uint32_t vq_index, i, version; + struct mlx5_vdpa_virtq *virtq; + uint64_t sec; + + pthread_mutex_lock(&priv->vq_config_lock); + while (mlx5_glue->devx_get_event(priv->err_chnl, &out.event_resp, + sizeof(out.buf)) >= + (ssize_t)sizeof(out.event_resp.cookie)) { + vq_index = out.event_resp.cookie & UINT32_MAX; + version = out.event_resp.cookie >> 32; + if (vq_index >= priv->nr_virtqs) { + DRV_LOG(ERR, "Invalid device %s error event virtq %d.", + priv->vdev->device->name, vq_index); + continue; + } + virtq = &priv->virtqs[vq_index]; + if (!virtq->enable || virtq->version != version) + continue; + if (rte_rdtsc() / rte_get_tsc_hz() < MLX5_VDPA_ERROR_TIME_SEC) + continue; + virtq->stopped = true; + /* Query error info. */ + if (mlx5_vdpa_virtq_query(priv, vq_index)) + goto log; + /* Disable vq. */ + if (mlx5_vdpa_virtq_enable(priv, vq_index, 0)) { + DRV_LOG(ERR, "Failed to disable virtq %d.", vq_index); + goto log; + } + /* Retry if error happens less than N times in 3 seconds. */ + sec = (rte_rdtsc() - virtq->err_time[0]) / rte_get_tsc_hz(); + if (sec > MLX5_VDPA_ERROR_TIME_SEC) { + /* Retry. */ + if (mlx5_vdpa_virtq_enable(priv, vq_index, 1)) + DRV_LOG(ERR, "Failed to enable virtq %d.", + vq_index); + else + DRV_LOG(WARNING, "Recover virtq %d: %u.", + vq_index, ++virtq->n_retry); + } else { + /* Retry timeout, give up. */ + DRV_LOG(ERR, "Device %s virtq %d failed to recover.", + priv->vdev->device->name, vq_index); + } +log: + /* Shift in current time to error time log end. */ + for (i = 1; i < RTE_DIM(virtq->err_time); i++) + virtq->err_time[i - 1] = virtq->err_time[i]; + virtq->err_time[RTE_DIM(virtq->err_time) - 1] = rte_rdtsc(); + } + pthread_mutex_unlock(&priv->vq_config_lock); +#endif +} + int -mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv) +mlx5_vdpa_err_event_setup(struct mlx5_vdpa_priv *priv) { + int ret; int flags; + + /* Setup device event channel. */ + priv->err_chnl = mlx5_glue->devx_create_event_channel(priv->ctx, 0); + if (!priv->err_chnl) { + rte_errno = errno; + DRV_LOG(ERR, "Failed to create device event channel %d.", + rte_errno); + goto error; + } + flags = fcntl(priv->err_chnl->fd, F_GETFL); + ret = fcntl(priv->err_chnl->fd, F_SETFL, flags | O_NONBLOCK); + if (ret) { + DRV_LOG(ERR, "Failed to change device event channel FD."); + goto error; + } + priv->err_intr_handle.fd = priv->err_chnl->fd; + priv->err_intr_handle.type = RTE_INTR_HANDLE_EXT; + if (rte_intr_callback_register(&priv->err_intr_handle, + mlx5_vdpa_err_interrupt_handler, + priv)) { + priv->err_intr_handle.fd = 0; + DRV_LOG(ERR, "Failed to register error interrupt for device %d.", + priv->vid); + goto error; + } else { + DRV_LOG(DEBUG, "Registered error interrupt for device%d.", + priv->vid); + } + return 0; +error: + mlx5_vdpa_err_event_unset(priv); + return -1; +} + +void +mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv) +{ + int retries = MLX5_VDPA_INTR_RETRIES; + int ret = -EAGAIN; + + if (!priv->err_intr_handle.fd) + return; + while (retries-- && ret == -EAGAIN) { + ret = rte_intr_callback_unregister(&priv->err_intr_handle, + mlx5_vdpa_err_interrupt_handler, + priv); + if (ret == -EAGAIN) { + DRV_LOG(DEBUG, "Try again to unregister fd %d " + "of error interrupt, retries = %d.", + priv->err_intr_handle.fd, retries); + rte_pause(); + } + } + memset(&priv->err_intr_handle, 0, sizeof(priv->err_intr_handle)); + if (priv->err_chnl) { +#ifdef HAVE_IBV_DEVX_EVENT + union { + struct mlx5dv_devx_async_event_hdr event_resp; + uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + + 128]; + } out; + + /* Clean all pending events. */ + while (mlx5_glue->devx_get_event(priv->err_chnl, + &out.event_resp, sizeof(out.buf)) >= + (ssize_t)sizeof(out.event_resp.cookie)) + ; +#endif + mlx5_glue->devx_destroy_event_channel(priv->err_chnl); + priv->err_chnl = NULL; + } +} + +int +mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv) +{ int ret; + rte_cpuset_t cpuset; + pthread_attr_t attr; + char name[16]; + const struct sched_param sp = { + .sched_priority = sched_get_priority_max(SCHED_RR), + }; if (!priv->eventc) /* All virtqs are in poll mode. */ @@ -386,18 +515,40 @@ mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv) pthread_mutex_init(&priv->timer_lock, NULL); pthread_cond_init(&priv->timer_cond, NULL); priv->timer_on = 0; - ret = pthread_create(&priv->timer_tid, NULL, + pthread_attr_init(&attr); + ret = pthread_attr_setschedpolicy(&attr, SCHED_RR); + if (ret) { + DRV_LOG(ERR, "Failed to set thread sched policy = RR."); + return -1; + } + ret = pthread_attr_setschedparam(&attr, &sp); + if (ret) { + DRV_LOG(ERR, "Failed to set thread priority."); + return -1; + } + ret = pthread_create(&priv->timer_tid, &attr, mlx5_vdpa_poll_handle, (void *)priv); if (ret) { DRV_LOG(ERR, "Failed to create timer thread."); return -1; } - } - flags = fcntl(priv->eventc->fd, F_GETFL); - ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK); - if (ret) { - DRV_LOG(ERR, "Failed to change event channel FD."); - goto error; + CPU_ZERO(&cpuset); + if (priv->event_core != -1) + CPU_SET(priv->event_core, &cpuset); + else + cpuset = rte_lcore_cpuset(rte_get_main_lcore()); + ret = pthread_setaffinity_np(priv->timer_tid, + sizeof(cpuset), &cpuset); + if (ret) { + DRV_LOG(ERR, "Failed to set thread affinity."); + goto error; + } + snprintf(name, sizeof(name), "vDPA-mlx5-%d", priv->vid); + ret = pthread_setname_np(priv->timer_tid, name); + if (ret) { + DRV_LOG(ERR, "Failed to set timer thread name."); + return -1; + } } priv->intr_handle.fd = priv->eventc->fd; priv->intr_handle.type = RTE_INTR_HANDLE_EXT; @@ -512,6 +663,7 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, if (mlx5_vdpa_cq_create(priv, log_desc_n, callfd, &eqp->cq)) return -1; attr.pd = priv->pdn; + attr.ts_format = mlx5_ts_format_conv(priv->qp_ts_format); eqp->fw_qp = mlx5_devx_cmd_create_qp(priv->ctx, &attr); if (!eqp->fw_qp) { DRV_LOG(ERR, "Failed to create FW QP(%u).", rte_errno); @@ -532,7 +684,7 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, goto error; } attr.uar_index = priv->uar->page_id; - attr.cqn = eqp->cq.cq->id; + attr.cqn = eqp->cq.cq_obj.cq->id; attr.log_page_size = rte_log2_u32(sysconf(_SC_PAGESIZE)); attr.rq_size = 1 << log_desc_n; attr.log_rq_stride = rte_log2_u32(MLX5_WSEG_SIZE); @@ -542,6 +694,7 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, attr.wq_umem_offset = 0; attr.dbr_umem_id = eqp->umem_obj->umem_id; attr.dbr_address = (1 << log_desc_n) * MLX5_WSEG_SIZE; + attr.ts_format = mlx5_ts_format_conv(priv->qp_ts_format); eqp->sw_qp = mlx5_devx_cmd_create_qp(priv->ctx, &attr); if (!eqp->sw_qp) { DRV_LOG(ERR, "Failed to create SW QP(%u).", rte_errno);