X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fcommon%2Fmlx5%2Flinux%2Fmlx5_common_os.c;h=b516564b79465b2a519f4c4e6788a9910542e161;hb=9c777ccfbbb65977d62985f8c00e182d1cdd9f0c;hp=5cf9576921ae9e2e998c3702a42bb0a2dc47eec5;hpb=59df97f1a832a0edfd7f77ffbe5149e553e860b5;p=dpdk.git diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c index 5cf9576921..b516564b79 100644 --- a/drivers/common/mlx5/linux/mlx5_common_os.c +++ b/drivers/common/mlx5/linux/mlx5_common_os.c @@ -2,6 +2,7 @@ * Copyright 2020 Mellanox Technologies, Ltd */ +#include #include #include #include @@ -13,29 +14,23 @@ #include #include +#include +#include #include "mlx5_common.h" -#include "mlx5_common_utils.h" +#include "mlx5_nl.h" +#include "mlx5_common_log.h" +#include "mlx5_common_private.h" +#include "mlx5_common_defs.h" +#include "mlx5_common_os.h" #include "mlx5_glue.h" #ifdef MLX5_GLUE const struct mlx5_glue *mlx5_glue; #endif -/** - * Get PCI information by sysfs device path. - * - * @param dev_path - * Pointer to device sysfs folder name. - * @param[out] pci_addr - * PCI bus address output buffer. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ int -mlx5_dev_to_pci_addr(const char *dev_path, - struct rte_pci_addr *pci_addr) +mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr) { FILE *file; char line[32]; @@ -158,17 +153,6 @@ mlx5_translate_port_name(const char *port_name_in, port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; } -/** - * Get kernel interface name from IB device path. - * - * @param[in] ibdev_path - * Pointer to IB device path. - * @param[out] ifname - * Interface name output buffer. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ int mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname) { @@ -423,3 +407,340 @@ glue_error: mlx5_glue = NULL; } +/** + * Allocate Protection Domain object and extract its pdn using DV API. + * + * @param[out] cdev + * Pointer to the mlx5 device. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_os_pd_create(struct mlx5_common_device *cdev) +{ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + struct mlx5dv_obj obj; + struct mlx5dv_pd pd_info; + int ret; +#endif + + cdev->pd = mlx5_glue->alloc_pd(cdev->ctx); + if (cdev->pd == NULL) { + DRV_LOG(ERR, "Failed to allocate PD."); + return errno ? -errno : -ENOMEM; + } + if (cdev->config.devx == 0) + return 0; +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + obj.pd.in = cdev->pd; + obj.pd.out = &pd_info; + ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); + if (ret != 0) { + DRV_LOG(ERR, "Fail to get PD object info."); + mlx5_glue->dealloc_pd(cdev->pd); + cdev->pd = NULL; + return -errno; + } + cdev->pdn = pd_info.pdn; + return 0; +#else + DRV_LOG(ERR, "Cannot get pdn - no DV support."); + return -ENOTSUP; +#endif /* HAVE_IBV_FLOW_DV_SUPPORT */ +} + +static struct ibv_device * +mlx5_os_get_ibv_device(const struct rte_pci_addr *addr) +{ + int n; + struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n); + struct ibv_device *ibv_match = NULL; + + if (ibv_list == NULL) { + rte_errno = ENOSYS; + return NULL; + } + while (n-- > 0) { + struct rte_pci_addr paddr; + + DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name); + if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0) + continue; + if (rte_pci_addr_cmp(addr, &paddr) != 0) + continue; + ibv_match = ibv_list[n]; + break; + } + if (ibv_match == NULL) { + DRV_LOG(WARNING, + "No Verbs device matches PCI device " PCI_PRI_FMT "," + " are kernel drivers loaded?", + addr->domain, addr->bus, addr->devid, addr->function); + rte_errno = ENOENT; + } + mlx5_glue->free_device_list(ibv_list); + return ibv_match; +} + +/* Try to disable ROCE by Netlink\Devlink. */ +static int +mlx5_nl_roce_disable(const char *addr) +{ + int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); + int devlink_id; + int enable; + int ret; + + if (nlsk_fd < 0) + return nlsk_fd; + devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); + if (devlink_id < 0) { + ret = devlink_id; + DRV_LOG(DEBUG, + "Failed to get devlink id for ROCE operations by Netlink."); + goto close; + } + ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); + if (ret) { + DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", + ret); + goto close; + } else if (!enable) { + DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); + goto close; + } + ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); + if (ret) + DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); + else + DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); +close: + close(nlsk_fd); + return ret; +} + +/* Try to disable ROCE by sysfs. */ +static int +mlx5_sys_roce_disable(const char *addr) +{ + FILE *file_o; + int enable; + int ret; + + MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); + file_o = fopen(file_p, "rb"); + if (!file_o) { + rte_errno = ENOTSUP; + return -ENOTSUP; + } + ret = fscanf(file_o, "%d", &enable); + if (ret != 1) { + rte_errno = EINVAL; + ret = EINVAL; + goto close; + } else if (!enable) { + ret = 0; + DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); + goto close; + } + fclose(file_o); + file_o = fopen(file_p, "wb"); + if (!file_o) { + rte_errno = ENOTSUP; + return -ENOTSUP; + } + fprintf(file_o, "0\n"); + ret = 0; +close: + if (ret) + DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); + else + DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); + fclose(file_o); + return ret; +} + +static int +mlx5_roce_disable(const struct rte_device *dev) +{ + char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; + + if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) + return -rte_errno; + /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ + if (mlx5_nl_roce_disable(pci_addr) != 0 && + mlx5_sys_roce_disable(pci_addr) != 0) + return -rte_errno; + return 0; +} + +static struct ibv_device * +mlx5_os_get_ibv_dev(const struct rte_device *dev) +{ + struct ibv_device *ibv; + + if (mlx5_dev_is_pci(dev)) + ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr); + else + ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev)); + if (ibv == NULL) { + rte_errno = ENODEV; + DRV_LOG(ERR, "Verbs device not found: %s", dev->name); + } + return ibv; +} + +static struct ibv_device * +mlx5_vdpa_get_ibv_dev(const struct rte_device *dev) +{ + struct ibv_device *ibv; + int retry; + + if (mlx5_roce_disable(dev) != 0) { + DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", + dev->name); + return NULL; + } + /* Wait for the IB device to appear again after reload. */ + for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) { + ibv = mlx5_os_get_ibv_dev(dev); + if (ibv != NULL) + return ibv; + usleep(MLX5_VDPA_USEC); + } + DRV_LOG(ERR, + "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.", + dev->name, MLX5_VDPA_MAX_RETRIES); + rte_errno = EAGAIN; + return NULL; +} + +static int +mlx5_config_doorbell_mapping_env(int dbnc) +{ + char *env; + int value; + + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); + /* Get environment variable to store. */ + env = getenv(MLX5_SHUT_UP_BF); + value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET; + if (dbnc == MLX5_ARG_UNSET) + setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); + else + setenv(MLX5_SHUT_UP_BF, + dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1); + return value; +} + +static void +mlx5_restore_doorbell_mapping_env(int value) +{ + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); + /* Restore the original environment variable state. */ + if (value == MLX5_ARG_UNSET) + unsetenv(MLX5_SHUT_UP_BF); + else + setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); +} + +/** + * Function API to open IB device. + * + * + * @param cdev + * Pointer to the mlx5 device. + * @param classes + * Chosen classes come from device arguments. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes) +{ + struct ibv_device *ibv; + struct ibv_context *ctx = NULL; + int dbmap_env; + + if (classes & MLX5_CLASS_VDPA) + ibv = mlx5_vdpa_get_ibv_dev(cdev->dev); + else + ibv = mlx5_os_get_ibv_dev(cdev->dev); + if (!ibv) + return -rte_errno; + DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name); + /* + * Configure environment variable "MLX5_BF_SHUT_UP" before the device + * creation. The rdma_core library checks the variable at device + * creation and stores the result internally. + */ + dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc); + /* Try to open IB device with DV first, then usual Verbs. */ + errno = 0; + ctx = mlx5_glue->dv_open_device(ibv); + if (ctx) { + cdev->config.devx = 1; + DRV_LOG(DEBUG, "DevX is supported."); + } else if (classes == MLX5_CLASS_ETH) { + /* The environment variable is still configured. */ + ctx = mlx5_glue->open_device(ibv); + if (ctx == NULL) + goto error; + DRV_LOG(DEBUG, "DevX is NOT supported."); + } else { + goto error; + } + /* The device is created, no need for environment. */ + mlx5_restore_doorbell_mapping_env(dbmap_env); + /* Hint libmlx5 to use PMD allocator for data plane resources */ + mlx5_set_context_attr(cdev->dev, ctx); + cdev->ctx = ctx; + return 0; +error: + rte_errno = errno ? errno : ENODEV; + /* The device creation is failed, no need for environment. */ + mlx5_restore_doorbell_mapping_env(dbmap_env); + DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); + return -rte_errno; +} +int +mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len) +{ + char tmp[512]; + char cur_ifname[IF_NAMESIZE + 1]; + FILE *id_file; + DIR *dir; + struct dirent *ptr; + int ret; + + if (guid == NULL || len < sizeof(u_int64_t) + 1) + return -1; + memset(guid, 0, len); + snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net", + dev->domain, dev->bus, dev->devid, dev->function); + dir = opendir(tmp); + if (dir == NULL) + return -1; + /* Traverse to identify PF interface */ + do { + ptr = readdir(dir); + if (ptr == NULL || ptr->d_type != DT_DIR) { + closedir(dir); + return -1; + } + } while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') || + strchr(ptr->d_name, 'v')); + snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name); + closedir(dir); + snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp), + "/%s/phys_switch_id", cur_ifname); + /* Older OFED like 5.3 doesn't support read */ + id_file = fopen(tmp, "r"); + if (!id_file) + return 0; + ret = fscanf(id_file, "%16s", guid); + fclose(id_file); + return ret; +}