X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fvdpa%2Fmlx5%2Fmlx5_vdpa.c;h=749c9d097cfad8b01124f670fde8e1221cc90ad4;hb=a729d2f093e9f7f13fa2e362a1a97bc2dc8f834a;hp=8b5bfd8c3d7f9f8ac0a21080e2762e4bb0526bdf;hpb=35d4f17b3d301469685442ebcc2108a5bd5e5bbe;p=dpdk.git diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c index 8b5bfd8c3d..749c9d097c 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c @@ -11,12 +11,12 @@ #include #include #include -#include #include +#include #include #include -#include +#include #include #include #include @@ -24,6 +24,7 @@ #include "mlx5_vdpa_utils.h" #include "mlx5_vdpa.h" +#define MLX5_VDPA_DRIVER_NAME vdpa_mlx5 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ (1ULL << VIRTIO_F_ANY_LAYOUT) | \ @@ -42,8 +43,6 @@ (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \ (1ULL << VHOST_USER_PROTOCOL_F_STATUS)) -#define MLX5_VDPA_MAX_RETRIES 20 -#define MLX5_VDPA_USEC 1000 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list = @@ -189,37 +188,6 @@ mlx5_vdpa_features_set(int vid) return 0; } -static int -mlx5_vdpa_pd_create(struct mlx5_vdpa_priv *priv) -{ -#ifdef HAVE_IBV_FLOW_DV_SUPPORT - priv->pd = mlx5_glue->alloc_pd(priv->ctx); - if (priv->pd == NULL) { - DRV_LOG(ERR, "Failed to allocate PD."); - return errno ? -errno : -ENOMEM; - } - struct mlx5dv_obj obj; - struct mlx5dv_pd pd_info; - int ret = 0; - - obj.pd.in = priv->pd; - obj.pd.out = &pd_info; - ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); - if (ret) { - DRV_LOG(ERR, "Fail to get PD object info."); - mlx5_glue->dealloc_pd(priv->pd); - priv->pd = NULL; - return -errno; - } - priv->pdn = pd_info.pdn; - return 0; -#else - (void)priv; - DRV_LOG(ERR, "Cannot get pdn - no DV support."); - return -ENOTSUP; -#endif /* HAVE_IBV_FLOW_DV_SUPPORT */ -} - static int mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv) { @@ -238,8 +206,9 @@ mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv) DRV_LOG(DEBUG, "Vhost MTU is 0."); return ret; } - ret = mlx5_get_ifname_sysfs(priv->ctx->device->ibdev_path, - request.ifr_name); + ret = mlx5_get_ifname_sysfs + (mlx5_os_get_ctx_device_name(priv->cdev->ctx), + request.ifr_name); if (ret) { DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret); return ret; @@ -281,18 +250,14 @@ mlx5_vdpa_dev_close(int vid) DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); return -1; } - if (priv->configured) - ret |= mlx5_vdpa_lm_log(priv); mlx5_vdpa_err_event_unset(priv); mlx5_vdpa_cqe_event_unset(priv); + if (priv->configured) + ret |= mlx5_vdpa_lm_log(priv); mlx5_vdpa_steer_unset(priv); mlx5_vdpa_virtqs_release(priv); mlx5_vdpa_event_qp_global_release(priv); mlx5_vdpa_mem_dereg(priv); - if (priv->pd) { - claim_zero(mlx5_glue->dealloc_pd(priv->pd)); - priv->pd = NULL; - } priv->configured = 0; priv->vid = 0; /* The mutex may stay locked after event thread cancel - initiate it. */ @@ -320,8 +285,7 @@ mlx5_vdpa_dev_config(int vid) if (mlx5_vdpa_mtu_set(priv)) DRV_LOG(WARNING, "MTU cannot be set on device %s.", vdev->device->name); - if (mlx5_vdpa_pd_create(priv) || mlx5_vdpa_mem_register(priv) || - mlx5_vdpa_err_event_setup(priv) || + if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_err_event_setup(priv) || mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) || mlx5_vdpa_cqe_event_setup(priv)) { mlx5_vdpa_dev_close(vid); @@ -343,7 +307,7 @@ mlx5_vdpa_get_device_fd(int vid) DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); return -EINVAL; } - return priv->ctx->cmd_fd; + return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd; } static int @@ -472,115 +436,6 @@ static struct rte_vdpa_dev_ops mlx5_vdpa_ops = { .reset_stats = mlx5_vdpa_reset_stats, }; -/* Try to disable ROCE by Netlink\Devlink. */ -static int -mlx5_vdpa_nl_roce_disable(const char *addr) -{ - int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); - int devlink_id; - int enable; - int ret; - - if (nlsk_fd < 0) - return nlsk_fd; - devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); - if (devlink_id < 0) { - ret = devlink_id; - DRV_LOG(DEBUG, "Failed to get devlink id for ROCE operations by" - " Netlink."); - goto close; - } - ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); - if (ret) { - DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", - ret); - goto close; - } else if (!enable) { - DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); - goto close; - } - ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); - if (ret) - DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); - else - DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); -close: - close(nlsk_fd); - return ret; -} - -/* Try to disable ROCE by sysfs. */ -static int -mlx5_vdpa_sys_roce_disable(const char *addr) -{ - FILE *file_o; - int enable; - int ret; - - MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); - file_o = fopen(file_p, "rb"); - if (!file_o) { - rte_errno = ENOTSUP; - return -ENOTSUP; - } - ret = fscanf(file_o, "%d", &enable); - if (ret != 1) { - rte_errno = EINVAL; - ret = EINVAL; - goto close; - } else if (!enable) { - ret = 0; - DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); - goto close; - } - fclose(file_o); - file_o = fopen(file_p, "wb"); - if (!file_o) { - rte_errno = ENOTSUP; - return -ENOTSUP; - } - fprintf(file_o, "0\n"); - ret = 0; -close: - if (ret) - DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); - else - DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); - fclose(file_o); - return ret; -} - -static int -mlx5_vdpa_roce_disable(struct rte_pci_addr *addr, struct ibv_device **ibv) -{ - char addr_name[64] = {0}; - - rte_pci_device_name(addr, addr_name, sizeof(addr_name)); - /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ - if (mlx5_vdpa_nl_roce_disable(addr_name) == 0 || - mlx5_vdpa_sys_roce_disable(addr_name) == 0) { - /* - * Succeed to disable ROCE, wait for the IB device to appear - * again after reload. - */ - int r; - struct ibv_device *ibv_new; - - for (r = MLX5_VDPA_MAX_RETRIES; r; r--) { - ibv_new = mlx5_os_get_ibv_device(addr); - if (ibv_new) { - *ibv = ibv_new; - return 0; - } - usleep(MLX5_VDPA_USEC); - } - DRV_LOG(ERR, "Cannot much device %s after ROCE disable, " - "retries exceed %d", addr_name, MLX5_VDPA_MAX_RETRIES); - rte_errno = EAGAIN; - } - return -rte_errno; -} - static int mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque) { @@ -588,8 +443,6 @@ mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque) unsigned long tmp; int n_cores = sysconf(_SC_NPROCESSORS_ONLN); - if (strcmp(key, RTE_DEVARGS_KEY_CLASS) == 0) - return 0; errno = 0; tmp = strtoul(val, NULL, 0); if (errno) { @@ -616,28 +469,33 @@ mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque) priv->hw_max_latency_us = (uint32_t)tmp; } else if (strcmp(key, "hw_max_pending_comp") == 0) { priv->hw_max_pending_comp = (uint32_t)tmp; - } else { - DRV_LOG(WARNING, "Invalid key %s.", key); } return 0; } static void -mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv) +mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist, + struct mlx5_vdpa_priv *priv) { - struct rte_kvargs *kvlist; + const char **params = (const char *[]){ + "event_core", + "event_mode", + "event_us", + "hw_latency_mode", + "hw_max_latency_us", + "hw_max_pending_comp", + "no_traffic_time", + NULL, + }; priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER; priv->event_us = 0; priv->event_core = -1; priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX; - if (devargs == NULL) - return; - kvlist = rte_kvargs_parse(devargs->args, NULL); - if (kvlist == NULL) + if (mkvlist == NULL) return; - rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv); - rte_kvargs_free(kvlist); + mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler, + priv); if (!priv->event_us && priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US; @@ -646,95 +504,62 @@ mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv) DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max); } -/** - * DPDK callback to register a mlx5 PCI device. - * - * This function spawns vdpa device out of a given PCI device. - * - * @param[in] pci_drv - * PCI driver structure (mlx5_vpda_driver). - * @param[in] pci_dev - * PCI device information. - * - * @return - * 0 on success, 1 to skip this driver, a negative errno value otherwise - * and rte_errno is set. - */ static int -mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, - struct rte_pci_device *pci_dev __rte_unused) +mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev, + struct mlx5_kvargs_ctrl *mkvlist) { - struct ibv_device *ibv; struct mlx5_vdpa_priv *priv = NULL; - struct ibv_context *ctx = NULL; - struct mlx5_hca_attr attr; - int ret; + struct mlx5_hca_attr *attr = &cdev->config.hca_attr; + int retry; - ibv = mlx5_os_get_ibv_device(&pci_dev->addr); - if (!ibv) { - DRV_LOG(ERR, "No matching IB device for PCI slot " - PCI_PRI_FMT ".", pci_dev->addr.domain, - pci_dev->addr.bus, pci_dev->addr.devid, - pci_dev->addr.function); - return -rte_errno; - } else { - DRV_LOG(INFO, "PCI information matches for device \"%s\".", - ibv->name); - } - if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) { - DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", - ibv->name); - return -rte_errno; - } - ctx = mlx5_glue->dv_open_device(ibv); - if (!ctx) { - DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); - rte_errno = ENODEV; - return -rte_errno; - } - ret = mlx5_devx_cmd_query_hca_attr(ctx, &attr); - if (ret) { - DRV_LOG(ERR, "Unable to read HCA capabilities."); - rte_errno = ENOTSUP; - goto error; - } else if (!attr.vdpa.valid || !attr.vdpa.max_num_virtio_queues) { + if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) { DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe " "old FW/OFED version?"); rte_errno = ENOTSUP; - goto error; + return -rte_errno; } - if (!attr.vdpa.queue_counters_valid) + if (!attr->vdpa.queue_counters_valid) DRV_LOG(DEBUG, "No capability to support virtq statistics."); priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) + sizeof(struct mlx5_vdpa_virtq) * - attr.vdpa.max_num_virtio_queues * 2, + attr->vdpa.max_num_virtio_queues * 2, RTE_CACHE_LINE_SIZE); if (!priv) { DRV_LOG(ERR, "Failed to allocate private memory."); rte_errno = ENOMEM; - goto error; + return -rte_errno; } - priv->caps = attr.vdpa; - priv->log_max_rqt_size = attr.log_max_rqt_size; - priv->num_lag_ports = attr.num_lag_ports; - priv->qp_ts_format = attr.qp_ts_format; - if (attr.num_lag_ports == 0) + priv->caps = attr->vdpa; + priv->log_max_rqt_size = attr->log_max_rqt_size; + priv->num_lag_ports = attr->num_lag_ports; + if (attr->num_lag_ports == 0) priv->num_lag_ports = 1; - priv->ctx = ctx; - priv->pci_dev = pci_dev; - priv->var = mlx5_glue->dv_alloc_var(ctx, 0); + priv->cdev = cdev; + for (retry = 0; retry < 7; retry++) { + priv->var = mlx5_glue->dv_alloc_var(priv->cdev->ctx, 0); + if (priv->var != NULL) + break; + DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.\n", retry); + /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */ + usleep(100000U << retry); + } if (!priv->var) { DRV_LOG(ERR, "Failed to allocate VAR %u.", errno); goto error; } - priv->vdev = rte_vdpa_register_device(&pci_dev->device, - &mlx5_vdpa_ops); + priv->err_intr_handle = + rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); + if (priv->err_intr_handle == NULL) { + DRV_LOG(ERR, "Fail to allocate intr_handle"); + goto error; + } + priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops); if (priv->vdev == NULL) { DRV_LOG(ERR, "Failed to register vDPA device."); rte_errno = rte_errno ? rte_errno : EINVAL; goto error; } - mlx5_vdpa_config_get(pci_dev->device.devargs, priv); + mlx5_vdpa_config_get(mkvlist, priv); SLIST_INIT(&priv->mr_list); pthread_mutex_init(&priv->vq_config_lock, NULL); pthread_mutex_lock(&priv_list_lock); @@ -746,33 +571,21 @@ error: if (priv) { if (priv->var) mlx5_glue->dv_free_var(priv->var); + rte_intr_instance_free(priv->err_intr_handle); rte_free(priv); } - if (ctx) - mlx5_glue->close_device(ctx); return -rte_errno; } -/** - * DPDK callback to remove a PCI device. - * - * This function removes all vDPA devices belong to a given PCI device. - * - * @param[in] pci_dev - * Pointer to the PCI device. - * - * @return - * 0 on success, the function cannot fail. - */ static int -mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev) +mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev) { struct mlx5_vdpa_priv *priv = NULL; int found = 0; pthread_mutex_lock(&priv_list_lock); TAILQ_FOREACH(priv, &priv_list, next) { - if (!rte_pci_addr_cmp(&priv->pci_dev->addr, &pci_dev->addr)) { + if (priv->vdev->device == cdev->dev) { found = 1; break; } @@ -789,8 +602,8 @@ mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev) } if (priv->vdev) rte_vdpa_unregister_device(priv->vdev); - mlx5_glue->close_device(priv->ctx); pthread_mutex_destroy(&priv->vq_config_lock); + rte_intr_instance_free(priv->err_intr_handle); rte_free(priv); } return 0; @@ -830,17 +643,12 @@ static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = { } }; -static struct mlx5_pci_driver mlx5_vdpa_driver = { - .driver_class = MLX5_CLASS_VDPA, - .pci_driver = { - .driver = { - .name = "mlx5_vdpa", - }, - .id_table = mlx5_vdpa_pci_id_map, - .probe = mlx5_vdpa_pci_probe, - .remove = mlx5_vdpa_pci_remove, - .drv_flags = 0, - }, +static struct mlx5_class_driver mlx5_vdpa_driver = { + .drv_class = MLX5_CLASS_VDPA, + .name = RTE_STR(MLX5_VDPA_DRIVER_NAME), + .id_table = mlx5_vdpa_pci_id_map, + .probe = mlx5_vdpa_dev_probe, + .remove = mlx5_vdpa_dev_remove, }; RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE) @@ -852,9 +660,9 @@ RTE_INIT(rte_mlx5_vdpa_init) { mlx5_common_init(); if (mlx5_glue) - mlx5_pci_driver_register(&mlx5_vdpa_driver); + mlx5_class_driver_register(&mlx5_vdpa_driver); } -RTE_PMD_EXPORT_NAME(net_mlx5_vdpa, __COUNTER__); -RTE_PMD_REGISTER_PCI_TABLE(net_mlx5_vdpa, mlx5_vdpa_pci_id_map); -RTE_PMD_REGISTER_KMOD_DEP(net_mlx5_vdpa, "* ib_uverbs & mlx5_core & mlx5_ib"); +RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map); +RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");