X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fvdpa%2Fmlx5%2Fmlx5_vdpa.c;h=6d17d7a6f3e3bb4e746e64ad840ff8db7a839b4b;hb=4ad8bc2fc7857a92aa0bb147529859358c5acc4f;hp=1bb6c687e5b9b21574a5c04f6c15f5a58c8816cc;hpb=9d39e57f21acef852d5d3187d79ca5e9667f67b7;p=dpdk.git diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c index 1bb6c687e5..6d17d7a6f3 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c @@ -1,55 +1,71 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright 2019 Mellanox Technologies, Ltd */ +#include +#include +#include +#include +#include +#include + #include #include #include +#include #include #include #include #include #include +#include #include "mlx5_vdpa_utils.h" #include "mlx5_vdpa.h" +#define MLX5_VDPA_DRIVER_NAME vdpa_mlx5 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ (1ULL << VIRTIO_F_ANY_LAYOUT) | \ (1ULL << VIRTIO_NET_F_MQ) | \ (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ - (1ULL << VHOST_F_LOG_ALL)) + (1ULL << VHOST_F_LOG_ALL) | \ + (1ULL << VIRTIO_NET_F_MTU)) #define MLX5_VDPA_PROTOCOL_FEATURES \ ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \ (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \ - (1ULL << VHOST_USER_PROTOCOL_F_MQ)) + (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \ + (1ULL << VHOST_USER_PROTOCOL_F_STATUS)) + +#define MLX5_VDPA_MAX_RETRIES 20 +#define MLX5_VDPA_USEC 1000 +#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list = TAILQ_HEAD_INITIALIZER(priv_list); static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER; -int mlx5_vdpa_logtype; static struct mlx5_vdpa_priv * -mlx5_vdpa_find_priv_resource_by_did(int did) +mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev) { struct mlx5_vdpa_priv *priv; int found = 0; pthread_mutex_lock(&priv_list_lock); TAILQ_FOREACH(priv, &priv_list, next) { - if (did == priv->id) { + if (vdev == priv->vdev) { found = 1; break; } } pthread_mutex_unlock(&priv_list_lock); if (!found) { - DRV_LOG(ERR, "Invalid device id: %d.", did); + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); rte_errno = EINVAL; return NULL; } @@ -57,12 +73,13 @@ mlx5_vdpa_find_priv_resource_by_did(int did) } static int -mlx5_vdpa_get_queue_num(int did, uint32_t *queue_num) +mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num) { - struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); if (priv == NULL) { - DRV_LOG(ERR, "Invalid device id: %d.", did); + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); return -1; } *queue_num = priv->caps.max_num_virtio_queues; @@ -70,12 +87,13 @@ mlx5_vdpa_get_queue_num(int did, uint32_t *queue_num) } static int -mlx5_vdpa_get_vdpa_features(int did, uint64_t *features) +mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features) { - struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); if (priv == NULL) { - DRV_LOG(ERR, "Invalid device id: %d.", did); + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); return -1; } *features = MLX5_VDPA_DEFAULT_FEATURES; @@ -95,12 +113,14 @@ mlx5_vdpa_get_vdpa_features(int did, uint64_t *features) } static int -mlx5_vdpa_get_protocol_features(int did, uint64_t *features) +mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev, + uint64_t *features) { - struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); if (priv == NULL) { - DRV_LOG(ERR, "Invalid device id: %d.", did); + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); return -1; } *features = MLX5_VDPA_PROTOCOL_FEATURES; @@ -110,35 +130,37 @@ mlx5_vdpa_get_protocol_features(int did, uint64_t *features) static int mlx5_vdpa_set_vring_state(int vid, int vring, int state) { - int did = rte_vhost_get_vdpa_device_id(vid); - struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); - struct mlx5_vdpa_virtq *virtq = NULL; + struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); + int ret; if (priv == NULL) { - DRV_LOG(ERR, "Invalid device id: %d.", did); + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); return -EINVAL; } - SLIST_FOREACH(virtq, &priv->virtq_list, next) - if (virtq->index == vring) - break; - if (!virtq) { - DRV_LOG(ERR, "Invalid or unconfigured vring id: %d.", vring); - return -EINVAL; + if (vring >= (int)priv->caps.max_num_virtio_queues * 2) { + DRV_LOG(ERR, "Too big vring id: %d.", vring); + return -E2BIG; } - return mlx5_vdpa_virtq_enable(virtq, state); + pthread_mutex_lock(&priv->vq_config_lock); + ret = mlx5_vdpa_virtq_enable(priv, vring, state); + pthread_mutex_unlock(&priv->vq_config_lock); + return ret; } static int mlx5_vdpa_features_set(int vid) { - int did = rte_vhost_get_vdpa_device_id(vid); - struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); uint64_t log_base, log_size; uint64_t features; int ret; if (priv == NULL) { - DRV_LOG(ERR, "Invalid device id: %d.", did); + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); return -EINVAL; } ret = rte_vhost_get_negotiated_features(vid, &features); @@ -167,222 +189,626 @@ mlx5_vdpa_features_set(int vid) return 0; } +static int +mlx5_vdpa_pd_create(struct mlx5_vdpa_priv *priv) +{ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + priv->pd = mlx5_glue->alloc_pd(priv->ctx); + if (priv->pd == NULL) { + DRV_LOG(ERR, "Failed to allocate PD."); + return errno ? -errno : -ENOMEM; + } + struct mlx5dv_obj obj; + struct mlx5dv_pd pd_info; + int ret = 0; + + obj.pd.in = priv->pd; + obj.pd.out = &pd_info; + ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); + if (ret) { + DRV_LOG(ERR, "Fail to get PD object info."); + mlx5_glue->dealloc_pd(priv->pd); + priv->pd = NULL; + return -errno; + } + priv->pdn = pd_info.pdn; + return 0; +#else + (void)priv; + DRV_LOG(ERR, "Cannot get pdn - no DV support."); + return -ENOTSUP; +#endif /* HAVE_IBV_FLOW_DV_SUPPORT */ +} + +static int +mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv) +{ + struct ifreq request; + uint16_t vhost_mtu = 0; + uint16_t kern_mtu = 0; + int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu); + int sock; + int retries = MLX5_VDPA_MAX_RETRIES; + + if (ret) { + DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret); + return ret; + } + if (!vhost_mtu) { + DRV_LOG(DEBUG, "Vhost MTU is 0."); + return ret; + } + ret = mlx5_get_ifname_sysfs(priv->ctx->device->ibdev_path, + request.ifr_name); + if (ret) { + DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret); + return ret; + } + sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); + if (sock == -1) { + DRV_LOG(DEBUG, "Cannot open IF socket."); + return sock; + } + while (retries--) { + ret = ioctl(sock, SIOCGIFMTU, &request); + if (ret == -1) + break; + kern_mtu = request.ifr_mtu; + DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu, + (int)vhost_mtu); + if (kern_mtu == vhost_mtu) + break; + request.ifr_mtu = vhost_mtu; + ret = ioctl(sock, SIOCSIFMTU, &request); + if (ret == -1) + break; + request.ifr_mtu = 0; + usleep(MLX5_VDPA_USEC); + } + close(sock); + return kern_mtu == vhost_mtu ? 0 : -1; +} + +static int +mlx5_vdpa_dev_close(int vid) +{ + struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); + int ret = 0; + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); + return -1; + } + if (priv->configured) + ret |= mlx5_vdpa_lm_log(priv); + mlx5_vdpa_err_event_unset(priv); + mlx5_vdpa_cqe_event_unset(priv); + mlx5_vdpa_steer_unset(priv); + mlx5_vdpa_virtqs_release(priv); + mlx5_vdpa_event_qp_global_release(priv); + mlx5_vdpa_mem_dereg(priv); + if (priv->pd) { + claim_zero(mlx5_glue->dealloc_pd(priv->pd)); + priv->pd = NULL; + } + priv->configured = 0; + priv->vid = 0; + /* The mutex may stay locked after event thread cancel - initiate it. */ + pthread_mutex_init(&priv->vq_config_lock, NULL); + DRV_LOG(INFO, "vDPA device %d was closed.", vid); + return ret; +} + +static int +mlx5_vdpa_dev_config(int vid) +{ + struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); + return -EINVAL; + } + if (priv->configured && mlx5_vdpa_dev_close(vid)) { + DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid); + return -1; + } + priv->vid = vid; + if (mlx5_vdpa_mtu_set(priv)) + DRV_LOG(WARNING, "MTU cannot be set on device %s.", + vdev->device->name); + if (mlx5_vdpa_pd_create(priv) || mlx5_vdpa_mem_register(priv) || + mlx5_vdpa_err_event_setup(priv) || + mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) || + mlx5_vdpa_cqe_event_setup(priv)) { + mlx5_vdpa_dev_close(vid); + return -1; + } + priv->configured = 1; + DRV_LOG(INFO, "vDPA device %d was configured.", vid); + return 0; +} + +static int +mlx5_vdpa_get_device_fd(int vid) +{ + struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); + return -EINVAL; + } + return priv->ctx->cmd_fd; +} + +static int +mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size) +{ + struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); + + RTE_SET_USED(qid); + if (priv == NULL) { + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); + return -EINVAL; + } + if (!priv->var) { + DRV_LOG(ERR, "VAR was not created for device %s, is the device" + " configured?.", vdev->device->name); + return -EINVAL; + } + *offset = priv->var->mmap_off; + *size = priv->var->length; + return 0; +} + +static int +mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev, + struct rte_vdpa_stat_name *stats_names, + unsigned int size) +{ + static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = { + "received_descriptors", + "completed_descriptors", + "bad descriptor errors", + "exceed max chain", + "invalid buffer", + "completion errors", + }; + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); + unsigned int i; + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name); + return -ENODEV; + } + if (!stats_names) + return MLX5_VDPA_STATS_MAX; + size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX); + for (i = 0; i < size; ++i) + strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i], + RTE_VDPA_STATS_NAME_SIZE); + return size; +} + +static int +mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid, + struct rte_vdpa_stat *stats, unsigned int n) +{ + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name); + return -ENODEV; + } + if (!priv->configured) { + DRV_LOG(ERR, "Device %s was not configured.", + vdev->device->name); + return -ENODATA; + } + if (qid >= (int)priv->nr_virtqs) { + DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid, + vdev->device->name); + return -E2BIG; + } + if (!priv->caps.queue_counters_valid) { + DRV_LOG(ERR, "Virtq statistics is not supported for device %s.", + vdev->device->name); + return -ENOTSUP; + } + return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n); +} + +static int +mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid) +{ + struct mlx5_vdpa_priv *priv = + mlx5_vdpa_find_priv_resource_by_vdev(vdev); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name); + return -ENODEV; + } + if (!priv->configured) { + DRV_LOG(ERR, "Device %s was not configured.", + vdev->device->name); + return -ENODATA; + } + if (qid >= (int)priv->nr_virtqs) { + DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid, + vdev->device->name); + return -E2BIG; + } + if (!priv->caps.queue_counters_valid) { + DRV_LOG(ERR, "Virtq statistics is not supported for device %s.", + vdev->device->name); + return -ENOTSUP; + } + return mlx5_vdpa_virtq_stats_reset(priv, qid); +} + static struct rte_vdpa_dev_ops mlx5_vdpa_ops = { .get_queue_num = mlx5_vdpa_get_queue_num, .get_features = mlx5_vdpa_get_vdpa_features, .get_protocol_features = mlx5_vdpa_get_protocol_features, - .dev_conf = NULL, - .dev_close = NULL, + .dev_conf = mlx5_vdpa_dev_config, + .dev_close = mlx5_vdpa_dev_close, .set_vring_state = mlx5_vdpa_set_vring_state, .set_features = mlx5_vdpa_features_set, .migration_done = NULL, .get_vfio_group_fd = NULL, - .get_vfio_device_fd = NULL, - .get_notify_area = NULL, + .get_vfio_device_fd = mlx5_vdpa_get_device_fd, + .get_notify_area = mlx5_vdpa_get_notify_area, + .get_stats_names = mlx5_vdpa_get_stats_names, + .get_stats = mlx5_vdpa_get_stats, + .reset_stats = mlx5_vdpa_reset_stats, }; -/** - * DPDK callback to register a PCI device. - * - * This function spawns vdpa device out of a given PCI device. - * - * @param[in] pci_drv - * PCI driver structure (mlx5_vpda_driver). - * @param[in] pci_dev - * PCI device information. - * - * @return - * 0 on success, 1 to skip this driver, a negative errno value otherwise - * and rte_errno is set. - */ +/* Try to disable ROCE by Netlink\Devlink. */ +static int +mlx5_vdpa_nl_roce_disable(const char *addr) +{ + int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); + int devlink_id; + int enable; + int ret; + + if (nlsk_fd < 0) + return nlsk_fd; + devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); + if (devlink_id < 0) { + ret = devlink_id; + DRV_LOG(DEBUG, "Failed to get devlink id for ROCE operations by" + " Netlink."); + goto close; + } + ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); + if (ret) { + DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", + ret); + goto close; + } else if (!enable) { + DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); + goto close; + } + ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); + if (ret) + DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); + else + DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); +close: + close(nlsk_fd); + return ret; +} + +/* Try to disable ROCE by sysfs. */ +static int +mlx5_vdpa_sys_roce_disable(const char *addr) +{ + FILE *file_o; + int enable; + int ret; + + MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); + file_o = fopen(file_p, "rb"); + if (!file_o) { + rte_errno = ENOTSUP; + return -ENOTSUP; + } + ret = fscanf(file_o, "%d", &enable); + if (ret != 1) { + rte_errno = EINVAL; + ret = EINVAL; + goto close; + } else if (!enable) { + ret = 0; + DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); + goto close; + } + fclose(file_o); + file_o = fopen(file_p, "wb"); + if (!file_o) { + rte_errno = ENOTSUP; + return -ENOTSUP; + } + fprintf(file_o, "0\n"); + ret = 0; +close: + if (ret) + DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); + else + DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); + fclose(file_o); + return ret; +} + +static int +mlx5_vdpa_roce_disable(struct rte_device *dev) +{ + char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; + + if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) + return -rte_errno; + /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ + if (mlx5_vdpa_nl_roce_disable(pci_addr) != 0 && + mlx5_vdpa_sys_roce_disable(pci_addr) != 0) + return -rte_errno; + return 0; +} + +static int +mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque) +{ + struct mlx5_vdpa_priv *priv = opaque; + unsigned long tmp; + int n_cores = sysconf(_SC_NPROCESSORS_ONLN); + + if (strcmp(key, RTE_DEVARGS_KEY_CLASS) == 0) + return 0; + errno = 0; + tmp = strtoul(val, NULL, 0); + if (errno) { + DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val); + return -errno; + } + if (strcmp(key, "event_mode") == 0) { + if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) + priv->event_mode = (int)tmp; + else + DRV_LOG(WARNING, "Invalid event_mode %s.", val); + } else if (strcmp(key, "event_us") == 0) { + priv->event_us = (uint32_t)tmp; + } else if (strcmp(key, "no_traffic_time") == 0) { + priv->no_traffic_max = (uint32_t)tmp; + } else if (strcmp(key, "event_core") == 0) { + if (tmp >= (unsigned long)n_cores) + DRV_LOG(WARNING, "Invalid event_core %s.", val); + else + priv->event_core = tmp; + } else if (strcmp(key, "hw_latency_mode") == 0) { + priv->hw_latency_mode = (uint32_t)tmp; + } else if (strcmp(key, "hw_max_latency_us") == 0) { + priv->hw_max_latency_us = (uint32_t)tmp; + } else if (strcmp(key, "hw_max_pending_comp") == 0) { + priv->hw_max_pending_comp = (uint32_t)tmp; + } else { + DRV_LOG(WARNING, "Invalid key %s.", key); + } + return 0; +} + +static void +mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv) +{ + struct rte_kvargs *kvlist; + + priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER; + priv->event_us = 0; + priv->event_core = -1; + priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX; + if (devargs == NULL) + return; + kvlist = rte_kvargs_parse(devargs->args, NULL); + if (kvlist == NULL) + return; + rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv); + rte_kvargs_free(kvlist); + if (!priv->event_us && + priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) + priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US; + DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode); + DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us); + DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max); +} + static int -mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, - struct rte_pci_device *pci_dev __rte_unused) +mlx5_vdpa_dev_probe(struct rte_device *dev) { - struct ibv_device **ibv_list; - struct ibv_device *ibv_match = NULL; + struct ibv_device *ibv; struct mlx5_vdpa_priv *priv = NULL; struct ibv_context *ctx = NULL; struct mlx5_hca_attr attr; + int retry; int ret; - if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_VDPA) { - DRV_LOG(DEBUG, "Skip probing - should be probed by other mlx5" - " driver."); - return 1; - } - errno = 0; - ibv_list = mlx5_glue->get_device_list(&ret); - if (!ibv_list) { - rte_errno = ENOSYS; - DRV_LOG(ERR, "Failed to get device list, is ib_uverbs loaded?"); + if (mlx5_vdpa_roce_disable(dev) != 0) { + DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", + dev->name); return -rte_errno; } - while (ret-- > 0) { - struct rte_pci_addr pci_addr; - - DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[ret]->name); - if (mlx5_dev_to_pci_addr(ibv_list[ret]->ibdev_path, &pci_addr)) - continue; - if (pci_dev->addr.domain != pci_addr.domain || - pci_dev->addr.bus != pci_addr.bus || - pci_dev->addr.devid != pci_addr.devid || - pci_dev->addr.function != pci_addr.function) - continue; - DRV_LOG(INFO, "PCI information matches for device \"%s\".", - ibv_list[ret]->name); - ibv_match = ibv_list[ret]; - break; - } - mlx5_glue->free_device_list(ibv_list); - if (!ibv_match) { - DRV_LOG(ERR, "No matching IB device for PCI slot " - "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 ".", - pci_dev->addr.domain, pci_dev->addr.bus, - pci_dev->addr.devid, pci_dev->addr.function); - rte_errno = ENOENT; + /* Wait for the IB device to appear again after reload. */ + for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) { + ibv = mlx5_os_get_ibv_dev(dev); + if (ibv != NULL) + break; + usleep(MLX5_VDPA_USEC); + } + if (ibv == NULL) { + DRV_LOG(ERR, "Cannot get IB device after disabling RoCE for " + "\"%s\", retries exceed %d.", + dev->name, MLX5_VDPA_MAX_RETRIES); + rte_errno = EAGAIN; return -rte_errno; } - ctx = mlx5_glue->dv_open_device(ibv_match); + ctx = mlx5_glue->dv_open_device(ibv); if (!ctx) { - DRV_LOG(ERR, "Failed to open IB device \"%s\".", - ibv_match->name); + DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); rte_errno = ENODEV; return -rte_errno; } - priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv), + ret = mlx5_devx_cmd_query_hca_attr(ctx, &attr); + if (ret) { + DRV_LOG(ERR, "Unable to read HCA capabilities."); + rte_errno = ENOTSUP; + goto error; + } else if (!attr.vdpa.valid || !attr.vdpa.max_num_virtio_queues) { + DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe " + "old FW/OFED version?"); + rte_errno = ENOTSUP; + goto error; + } + if (!attr.vdpa.queue_counters_valid) + DRV_LOG(DEBUG, "No capability to support virtq statistics."); + priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) + + sizeof(struct mlx5_vdpa_virtq) * + attr.vdpa.max_num_virtio_queues * 2, RTE_CACHE_LINE_SIZE); if (!priv) { DRV_LOG(ERR, "Failed to allocate private memory."); rte_errno = ENOMEM; goto error; } - ret = mlx5_devx_cmd_query_hca_attr(ctx, &attr); - if (ret) { - DRV_LOG(ERR, "Unable to read HCA capabilities."); - rte_errno = ENOTSUP; + priv->caps = attr.vdpa; + priv->log_max_rqt_size = attr.log_max_rqt_size; + priv->num_lag_ports = attr.num_lag_ports; + priv->qp_ts_format = attr.qp_ts_format; + if (attr.num_lag_ports == 0) + priv->num_lag_ports = 1; + priv->ctx = ctx; + priv->var = mlx5_glue->dv_alloc_var(ctx, 0); + if (!priv->var) { + DRV_LOG(ERR, "Failed to allocate VAR %u.", errno); goto error; - } else { - if (!attr.vdpa.valid || !attr.vdpa.max_num_virtio_queues) { - DRV_LOG(ERR, "Not enough capabilities to support vdpa," - " maybe old FW/OFED version?"); - rte_errno = ENOTSUP; - goto error; - } - priv->caps = attr.vdpa; - priv->log_max_rqt_size = attr.log_max_rqt_size; } - priv->ctx = ctx; - priv->dev_addr.pci_addr = pci_dev->addr; - priv->dev_addr.type = PCI_ADDR; - priv->id = rte_vdpa_register_device(&priv->dev_addr, &mlx5_vdpa_ops); - if (priv->id < 0) { + priv->vdev = rte_vdpa_register_device(dev, &mlx5_vdpa_ops); + if (priv->vdev == NULL) { DRV_LOG(ERR, "Failed to register vDPA device."); rte_errno = rte_errno ? rte_errno : EINVAL; goto error; } + mlx5_vdpa_config_get(dev->devargs, priv); SLIST_INIT(&priv->mr_list); - SLIST_INIT(&priv->virtq_list); + pthread_mutex_init(&priv->vq_config_lock, NULL); pthread_mutex_lock(&priv_list_lock); TAILQ_INSERT_TAIL(&priv_list, priv, next); pthread_mutex_unlock(&priv_list_lock); return 0; error: - if (priv) + if (priv) { + if (priv->var) + mlx5_glue->dv_free_var(priv->var); rte_free(priv); + } if (ctx) mlx5_glue->close_device(ctx); return -rte_errno; } -/** - * DPDK callback to remove a PCI device. - * - * This function removes all vDPA devices belong to a given PCI device. - * - * @param[in] pci_dev - * Pointer to the PCI device. - * - * @return - * 0 on success, the function cannot fail. - */ static int -mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev) +mlx5_vdpa_dev_remove(struct rte_device *dev) { struct mlx5_vdpa_priv *priv = NULL; int found = 0; pthread_mutex_lock(&priv_list_lock); TAILQ_FOREACH(priv, &priv_list, next) { - if (memcmp(&priv->dev_addr.pci_addr, &pci_dev->addr, - sizeof(pci_dev->addr)) == 0) { + if (priv->vdev->device == dev) { found = 1; break; } } - if (found) { + if (found) TAILQ_REMOVE(&priv_list, priv, next); + pthread_mutex_unlock(&priv_list_lock); + if (found) { + if (priv->configured) + mlx5_vdpa_dev_close(priv->vid); + if (priv->var) { + mlx5_glue->dv_free_var(priv->var); + priv->var = NULL; + } + if (priv->vdev) + rte_vdpa_unregister_device(priv->vdev); mlx5_glue->close_device(priv->ctx); + pthread_mutex_destroy(&priv->vq_config_lock); rte_free(priv); } - pthread_mutex_unlock(&priv_list_lock); return 0; } static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = { { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) + PCI_DEVICE_ID_MELLANOX_CONNECTX6) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF) + PCI_DEVICE_ID_MELLANOX_CONNECTX6VF) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX6) + PCI_DEVICE_ID_MELLANOX_CONNECTX6DX) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX6VF) + PCI_DEVICE_ID_MELLANOX_CONNECTXVF) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX6DX) + PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF) + PCI_DEVICE_ID_MELLANOX_CONNECTX7) + }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX7BF) }, { .vendor_id = 0 } }; -static struct rte_pci_driver mlx5_vdpa_driver = { - .driver = { - .name = "mlx5_vdpa", - }, +static struct mlx5_class_driver mlx5_vdpa_driver = { + .drv_class = MLX5_CLASS_VDPA, + .name = RTE_STR(MLX5_VDPA_DRIVER_NAME), .id_table = mlx5_vdpa_pci_id_map, - .probe = mlx5_vdpa_pci_probe, - .remove = mlx5_vdpa_pci_remove, - .drv_flags = 0, + .probe = mlx5_vdpa_dev_probe, + .remove = mlx5_vdpa_dev_remove, }; +RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE) + /** * Driver initialization routine. */ RTE_INIT(rte_mlx5_vdpa_init) { - /* Initialize common log type. */ - mlx5_vdpa_logtype = rte_log_register("pmd.vdpa.mlx5"); - if (mlx5_vdpa_logtype >= 0) - rte_log_set_level(mlx5_vdpa_logtype, RTE_LOG_NOTICE); + mlx5_common_init(); if (mlx5_glue) - rte_pci_register(&mlx5_vdpa_driver); + mlx5_class_driver_register(&mlx5_vdpa_driver); } -RTE_PMD_EXPORT_NAME(net_mlx5_vdpa, __COUNTER__); -RTE_PMD_REGISTER_PCI_TABLE(net_mlx5_vdpa, mlx5_vdpa_pci_id_map); -RTE_PMD_REGISTER_KMOD_DEP(net_mlx5_vdpa, "* ib_uverbs & mlx5_core & mlx5_ib"); +RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map); +RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");