X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fvdpa%2Fmlx5%2Fmlx5_vdpa.c;h=1113d6cef027f6e3885ee76f721f3cdbd886ce86;hb=b58d8781fa1fa573f9d5e9af81a4288fddf1e0a8;hp=80204b32b73d5a844a5a1686cb47b4bf1ec4df9a;hpb=95276abaaf0a3e605e99a306f1923f46c3037a64;p=dpdk.git diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c index 80204b32b7..1113d6cef0 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c @@ -1,50 +1,432 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright 2019 Mellanox Technologies, Ltd */ +#include + #include #include #include #include -#ifdef PEDANTIC -#pragma GCC diagnostic ignored "-Wpedantic" -#endif -#include -#ifdef PEDANTIC -#pragma GCC diagnostic error "-Wpedantic" -#endif +#include #include #include +#include +#include +#include #include "mlx5_vdpa_utils.h" +#include "mlx5_vdpa.h" -struct mlx5_vdpa_priv { - TAILQ_ENTRY(mlx5_vdpa_priv) next; - int id; /* vDPA device id. */ - struct ibv_context *ctx; /* Device context. */ - struct rte_vdpa_dev_addr dev_addr; -}; +#define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ + (1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_NET_F_MQ) | \ + (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ + (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ + (1ULL << VHOST_F_LOG_ALL)) + +#define MLX5_VDPA_PROTOCOL_FEATURES \ + ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \ + (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \ + (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \ + (1ULL << VHOST_USER_PROTOCOL_F_MQ)) TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list = TAILQ_HEAD_INITIALIZER(priv_list); static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER; int mlx5_vdpa_logtype; +static struct mlx5_vdpa_priv * +mlx5_vdpa_find_priv_resource_by_did(int did) +{ + struct mlx5_vdpa_priv *priv; + int found = 0; + + pthread_mutex_lock(&priv_list_lock); + TAILQ_FOREACH(priv, &priv_list, next) { + if (did == priv->id) { + found = 1; + break; + } + } + pthread_mutex_unlock(&priv_list_lock); + if (!found) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + rte_errno = EINVAL; + return NULL; + } + return priv; +} + +static int +mlx5_vdpa_get_queue_num(int did, uint32_t *queue_num) +{ + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -1; + } + *queue_num = priv->caps.max_num_virtio_queues; + return 0; +} + +static int +mlx5_vdpa_get_vdpa_features(int did, uint64_t *features) +{ + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -1; + } + *features = MLX5_VDPA_DEFAULT_FEATURES; + if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED)) + *features |= (1ULL << VIRTIO_F_RING_PACKED); + if (priv->caps.tso_ipv4) + *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4); + if (priv->caps.tso_ipv6) + *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6); + if (priv->caps.tx_csum) + *features |= (1ULL << VIRTIO_NET_F_CSUM); + if (priv->caps.rx_csum) + *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM); + if (priv->caps.virtio_version_1_0) + *features |= (1ULL << VIRTIO_F_VERSION_1); + return 0; +} + +static int +mlx5_vdpa_get_protocol_features(int did, uint64_t *features) +{ + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -1; + } + *features = MLX5_VDPA_PROTOCOL_FEATURES; + return 0; +} + +static int +mlx5_vdpa_set_vring_state(int vid, int vring, int state) +{ + int did = rte_vhost_get_vdpa_device_id(vid); + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -EINVAL; + } + if (vring >= (int)priv->caps.max_num_virtio_queues * 2) { + DRV_LOG(ERR, "Too big vring id: %d.", vring); + return -E2BIG; + } + return mlx5_vdpa_virtq_enable(priv, vring, state); +} + +static int +mlx5_vdpa_direct_db_prepare(struct mlx5_vdpa_priv *priv) +{ + int ret; + + if (priv->direct_notifier) { + ret = rte_vhost_host_notifier_ctrl(priv->vid, false); + if (ret != 0) { + DRV_LOG(INFO, "Direct HW notifier FD cannot be " + "destroyed for device %d: %d.", priv->vid, ret); + return -1; + } + priv->direct_notifier = 0; + } + ret = rte_vhost_host_notifier_ctrl(priv->vid, true); + if (ret != 0) + DRV_LOG(INFO, "Direct HW notifier FD cannot be configured for" + " device %d: %d.", priv->vid, ret); + else + priv->direct_notifier = 1; + return 0; +} + +static int +mlx5_vdpa_features_set(int vid) +{ + int did = rte_vhost_get_vdpa_device_id(vid); + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + uint64_t log_base, log_size; + uint64_t features; + int ret; + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -EINVAL; + } + ret = rte_vhost_get_negotiated_features(vid, &features); + if (ret) { + DRV_LOG(ERR, "Failed to get negotiated features."); + return ret; + } + if (RTE_VHOST_NEED_LOG(features)) { + ret = rte_vhost_get_log_base(vid, &log_base, &log_size); + if (ret) { + DRV_LOG(ERR, "Failed to get log base."); + return ret; + } + ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size); + if (ret) { + DRV_LOG(ERR, "Failed to set dirty bitmap."); + return ret; + } + DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging..."); + ret = mlx5_vdpa_logging_enable(priv, 1); + if (ret) { + DRV_LOG(ERR, "Failed t enable dirty logging."); + return ret; + } + } + return 0; +} + +static int +mlx5_vdpa_dev_close(int vid) +{ + int did = rte_vhost_get_vdpa_device_id(vid); + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + int ret = 0; + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -1; + } + if (priv->configured) + ret |= mlx5_vdpa_lm_log(priv); + mlx5_vdpa_cqe_event_unset(priv); + mlx5_vdpa_steer_unset(priv); + mlx5_vdpa_virtqs_release(priv); + mlx5_vdpa_event_qp_global_release(priv); + mlx5_vdpa_mem_dereg(priv); + priv->configured = 0; + priv->vid = 0; + DRV_LOG(INFO, "vDPA device %d was closed.", vid); + return ret; +} + +static int +mlx5_vdpa_dev_config(int vid) +{ + int did = rte_vhost_get_vdpa_device_id(vid); + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -EINVAL; + } + if (priv->configured && mlx5_vdpa_dev_close(vid)) { + DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid); + return -1; + } + priv->vid = vid; + if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_direct_db_prepare(priv) || + mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) || + mlx5_vdpa_cqe_event_setup(priv)) { + mlx5_vdpa_dev_close(vid); + return -1; + } + priv->configured = 1; + DRV_LOG(INFO, "vDPA device %d was configured.", vid); + return 0; +} + +static int +mlx5_vdpa_get_device_fd(int vid) +{ + int did = rte_vhost_get_vdpa_device_id(vid); + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -EINVAL; + } + return priv->ctx->cmd_fd; +} + +static int +mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size) +{ + int did = rte_vhost_get_vdpa_device_id(vid); + struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did); + + RTE_SET_USED(qid); + if (priv == NULL) { + DRV_LOG(ERR, "Invalid device id: %d.", did); + return -EINVAL; + } + if (!priv->var) { + DRV_LOG(ERR, "VAR was not created for device %d, is the device" + " configured?.", did); + return -EINVAL; + } + *offset = priv->var->mmap_off; + *size = priv->var->length; + return 0; +} + static struct rte_vdpa_dev_ops mlx5_vdpa_ops = { - .get_queue_num = NULL, - .get_features = NULL, - .get_protocol_features = NULL, - .dev_conf = NULL, - .dev_close = NULL, - .set_vring_state = NULL, - .set_features = NULL, + .get_queue_num = mlx5_vdpa_get_queue_num, + .get_features = mlx5_vdpa_get_vdpa_features, + .get_protocol_features = mlx5_vdpa_get_protocol_features, + .dev_conf = mlx5_vdpa_dev_config, + .dev_close = mlx5_vdpa_dev_close, + .set_vring_state = mlx5_vdpa_set_vring_state, + .set_features = mlx5_vdpa_features_set, .migration_done = NULL, .get_vfio_group_fd = NULL, - .get_vfio_device_fd = NULL, - .get_notify_area = NULL, + .get_vfio_device_fd = mlx5_vdpa_get_device_fd, + .get_notify_area = mlx5_vdpa_get_notify_area, }; +static struct ibv_device * +mlx5_vdpa_get_ib_device_match(struct rte_pci_addr *addr) +{ + int n; + struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n); + struct ibv_device *ibv_match = NULL; + + if (!ibv_list) { + rte_errno = ENOSYS; + return NULL; + } + while (n-- > 0) { + struct rte_pci_addr pci_addr; + + DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name); + if (mlx5_dev_to_pci_addr(ibv_list[n]->ibdev_path, &pci_addr)) + continue; + if (rte_pci_addr_cmp(addr, &pci_addr)) + continue; + ibv_match = ibv_list[n]; + break; + } + if (!ibv_match) + rte_errno = ENOENT; + mlx5_glue->free_device_list(ibv_list); + return ibv_match; +} + +/* Try to disable ROCE by Netlink\Devlink. */ +static int +mlx5_vdpa_nl_roce_disable(const char *addr) +{ + int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); + int devlink_id; + int enable; + int ret; + + if (nlsk_fd < 0) + return nlsk_fd; + devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); + if (devlink_id < 0) { + ret = devlink_id; + DRV_LOG(DEBUG, "Failed to get devlink id for ROCE operations by" + " Netlink."); + goto close; + } + ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); + if (ret) { + DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", + ret); + goto close; + } else if (!enable) { + DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); + goto close; + } + ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); + if (ret) + DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); + else + DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); +close: + close(nlsk_fd); + return ret; +} + +/* Try to disable ROCE by sysfs. */ +static int +mlx5_vdpa_sys_roce_disable(const char *addr) +{ + FILE *file_o; + int enable; + int ret; + + MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); + file_o = fopen(file_p, "rb"); + if (!file_o) { + rte_errno = ENOTSUP; + return -ENOTSUP; + } + ret = fscanf(file_o, "%d", &enable); + if (ret != 1) { + rte_errno = EINVAL; + ret = EINVAL; + goto close; + } else if (!enable) { + ret = 0; + DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); + goto close; + } + fclose(file_o); + file_o = fopen(file_p, "wb"); + if (!file_o) { + rte_errno = ENOTSUP; + return -ENOTSUP; + } + fprintf(file_o, "0\n"); + ret = 0; +close: + if (ret) + DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); + else + DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); + fclose(file_o); + return ret; +} + +#define MLX5_VDPA_MAX_RETRIES 20 +#define MLX5_VDPA_USEC 1000 +static int +mlx5_vdpa_roce_disable(struct rte_pci_addr *addr, struct ibv_device **ibv) +{ + char addr_name[64] = {0}; + + rte_pci_device_name(addr, addr_name, sizeof(addr_name)); + /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ + if (mlx5_vdpa_nl_roce_disable(addr_name) == 0 || + mlx5_vdpa_sys_roce_disable(addr_name) == 0) { + /* + * Succeed to disable ROCE, wait for the IB device to appear + * again after reload. + */ + int r; + struct ibv_device *ibv_new; + + for (r = MLX5_VDPA_MAX_RETRIES; r; r--) { + ibv_new = mlx5_vdpa_get_ib_device_match(addr); + if (ibv_new) { + *ibv = ibv_new; + return 0; + } + usleep(MLX5_VDPA_USEC); + } + DRV_LOG(ERR, "Cannot much device %s after ROCE disable, " + "retries exceed %d", addr_name, MLX5_VDPA_MAX_RETRIES); + rte_errno = EAGAIN; + } + return -rte_errno; +} + /** * DPDK callback to register a PCI device. * @@ -63,10 +445,10 @@ static int mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev __rte_unused) { - struct ibv_device **ibv_list; - struct ibv_device *ibv_match = NULL; + struct ibv_device *ibv; struct mlx5_vdpa_priv *priv = NULL; struct ibv_context *ctx = NULL; + struct mlx5_hca_attr attr; int ret; if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_VDPA) { @@ -74,69 +456,76 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, " driver."); return 1; } - errno = 0; - ibv_list = mlx5_glue->get_device_list(&ret); - if (!ibv_list) { - rte_errno = ENOSYS; - DRV_LOG(ERR, "Failed to get device list, is ib_uverbs loaded?"); + ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr); + if (!ibv) { + DRV_LOG(ERR, "No matching IB device for PCI slot " + PCI_PRI_FMT ".", pci_dev->addr.domain, + pci_dev->addr.bus, pci_dev->addr.devid, + pci_dev->addr.function); return -rte_errno; - } - while (ret-- > 0) { - struct rte_pci_addr pci_addr; - - DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[ret]->name); - if (mlx5_dev_to_pci_addr(ibv_list[ret]->ibdev_path, &pci_addr)) - continue; - if (pci_dev->addr.domain != pci_addr.domain || - pci_dev->addr.bus != pci_addr.bus || - pci_dev->addr.devid != pci_addr.devid || - pci_dev->addr.function != pci_addr.function) - continue; + } else { DRV_LOG(INFO, "PCI information matches for device \"%s\".", - ibv_list[ret]->name); - ibv_match = ibv_list[ret]; - break; + ibv->name); } - mlx5_glue->free_device_list(ibv_list); - if (!ibv_match) { - DRV_LOG(ERR, "No matching IB device for PCI slot " - "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 ".", - pci_dev->addr.domain, pci_dev->addr.bus, - pci_dev->addr.devid, pci_dev->addr.function); - rte_errno = ENOENT; + if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) { + DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", + ibv->name); return -rte_errno; } - ctx = mlx5_glue->dv_open_device(ibv_match); + ctx = mlx5_glue->dv_open_device(ibv); if (!ctx) { - DRV_LOG(ERR, "Failed to open IB device \"%s\".", - ibv_match->name); + DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); rte_errno = ENODEV; return -rte_errno; } - priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv), + ret = mlx5_devx_cmd_query_hca_attr(ctx, &attr); + if (ret) { + DRV_LOG(ERR, "Unable to read HCA capabilities."); + rte_errno = ENOTSUP; + goto error; + } else if (!attr.vdpa.valid || !attr.vdpa.max_num_virtio_queues) { + DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe " + "old FW/OFED version?"); + rte_errno = ENOTSUP; + goto error; + } + priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) + + sizeof(struct mlx5_vdpa_virtq) * + attr.vdpa.max_num_virtio_queues * 2, RTE_CACHE_LINE_SIZE); if (!priv) { DRV_LOG(ERR, "Failed to allocate private memory."); rte_errno = ENOMEM; goto error; } + priv->caps = attr.vdpa; + priv->log_max_rqt_size = attr.log_max_rqt_size; priv->ctx = ctx; priv->dev_addr.pci_addr = pci_dev->addr; - priv->dev_addr.type = PCI_ADDR; + priv->dev_addr.type = VDPA_ADDR_PCI; + priv->var = mlx5_glue->dv_alloc_var(ctx, 0); + if (!priv->var) { + DRV_LOG(ERR, "Failed to allocate VAR %u.\n", errno); + goto error; + } priv->id = rte_vdpa_register_device(&priv->dev_addr, &mlx5_vdpa_ops); if (priv->id < 0) { DRV_LOG(ERR, "Failed to register vDPA device."); rte_errno = rte_errno ? rte_errno : EINVAL; goto error; } + SLIST_INIT(&priv->mr_list); pthread_mutex_lock(&priv_list_lock); TAILQ_INSERT_TAIL(&priv_list, priv, next); pthread_mutex_unlock(&priv_list_lock); return 0; error: - if (priv) + if (priv) { + if (priv->var) + mlx5_glue->dv_free_var(priv->var); rte_free(priv); + } if (ctx) mlx5_glue->close_device(ctx); return -rte_errno; @@ -167,24 +556,23 @@ mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev) break; } } - if (found) { + if (found) TAILQ_REMOVE(&priv_list, priv, next); + pthread_mutex_unlock(&priv_list_lock); + if (found) { + if (priv->configured) + mlx5_vdpa_dev_close(priv->vid); + if (priv->var) { + mlx5_glue->dv_free_var(priv->var); + priv->var = NULL; + } mlx5_glue->close_device(priv->ctx); rte_free(priv); } - pthread_mutex_unlock(&priv_list_lock); return 0; } static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = { - { - RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) - }, - { - RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF) - }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX6) @@ -201,6 +589,10 @@ static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF) }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF) + }, { .vendor_id = 0 }