* Copyright 2020 Mellanox Technologies, Ltd
*/
+#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_auxiliary.h>
#include "mlx5_common.h"
-#include "mlx5_common_utils.h"
+#include "mlx5_nl.h"
+#include "mlx5_common_log.h"
+#include "mlx5_common_private.h"
+#include "mlx5_common_defs.h"
+#include "mlx5_common_os.h"
#include "mlx5_glue.h"
#ifdef MLX5_GLUE
const struct mlx5_glue *mlx5_glue;
#endif
-/**
- * Get PCI information by sysfs device path.
- *
- * @param dev_path
- * Pointer to device sysfs folder name.
- * @param[out] pci_addr
- * PCI bus address output buffer.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
int
-mlx5_dev_to_pci_addr(const char *dev_path,
- struct rte_pci_addr *pci_addr)
+mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
{
FILE *file;
char line[32];
port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
}
-/**
- * Get kernel interface name from IB device path.
- *
- * @param[in] ibdev_path
- * Pointer to IB device path.
- * @param[out] ifname
- * Interface name output buffer.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
int
mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
{
mlx5_glue = NULL;
}
+/**
+ * Allocate Protection Domain object and extract its pdn using DV API.
+ *
+ * @param[out] cdev
+ * Pointer to the mlx5 device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_pd_create(struct mlx5_common_device *cdev)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ struct mlx5dv_obj obj;
+ struct mlx5dv_pd pd_info;
+ int ret;
+#endif
+
+ cdev->pd = mlx5_glue->alloc_pd(cdev->ctx);
+ if (cdev->pd == NULL) {
+ DRV_LOG(ERR, "Failed to allocate PD.");
+ return errno ? -errno : -ENOMEM;
+ }
+ if (cdev->config.devx == 0)
+ return 0;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ obj.pd.in = cdev->pd;
+ obj.pd.out = &pd_info;
+ ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
+ if (ret != 0) {
+ DRV_LOG(ERR, "Fail to get PD object info.");
+ mlx5_glue->dealloc_pd(cdev->pd);
+ cdev->pd = NULL;
+ return -errno;
+ }
+ cdev->pdn = pd_info.pdn;
+ return 0;
+#else
+ DRV_LOG(ERR, "Cannot get pdn - no DV support.");
+ return -ENOTSUP;
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+}
+
+static struct ibv_device *
+mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
+{
+ int n;
+ struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
+ struct ibv_device *ibv_match = NULL;
+
+ if (ibv_list == NULL) {
+ rte_errno = ENOSYS;
+ return NULL;
+ }
+ while (n-- > 0) {
+ struct rte_pci_addr paddr;
+
+ DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
+ if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
+ continue;
+ if (rte_pci_addr_cmp(addr, &paddr) != 0)
+ continue;
+ ibv_match = ibv_list[n];
+ break;
+ }
+ if (ibv_match == NULL) {
+ DRV_LOG(WARNING,
+ "No Verbs device matches PCI device " PCI_PRI_FMT ","
+ " are kernel drivers loaded?",
+ addr->domain, addr->bus, addr->devid, addr->function);
+ rte_errno = ENOENT;
+ }
+ mlx5_glue->free_device_list(ibv_list);
+ return ibv_match;
+}
+
+/* Try to disable ROCE by Netlink\Devlink. */
+static int
+mlx5_nl_roce_disable(const char *addr)
+{
+ int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+ int devlink_id;
+ int enable;
+ int ret;
+
+ if (nlsk_fd < 0)
+ return nlsk_fd;
+ devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
+ if (devlink_id < 0) {
+ ret = devlink_id;
+ DRV_LOG(DEBUG,
+ "Failed to get devlink id for ROCE operations by Netlink.");
+ goto close;
+ }
+ ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
+ if (ret) {
+ DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
+ ret);
+ goto close;
+ } else if (!enable) {
+ DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
+ goto close;
+ }
+ ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
+ if (ret)
+ DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
+ else
+ DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
+close:
+ close(nlsk_fd);
+ return ret;
+}
+
+/* Try to disable ROCE by sysfs. */
+static int
+mlx5_sys_roce_disable(const char *addr)
+{
+ FILE *file_o;
+ int enable;
+ int ret;
+
+ MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
+ file_o = fopen(file_p, "rb");
+ if (!file_o) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ ret = fscanf(file_o, "%d", &enable);
+ if (ret != 1) {
+ rte_errno = EINVAL;
+ ret = EINVAL;
+ goto close;
+ } else if (!enable) {
+ ret = 0;
+ DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
+ goto close;
+ }
+ fclose(file_o);
+ file_o = fopen(file_p, "wb");
+ if (!file_o) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ fprintf(file_o, "0\n");
+ ret = 0;
+close:
+ if (ret)
+ DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
+ else
+ DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
+ fclose(file_o);
+ return ret;
+}
+
+static int
+mlx5_roce_disable(const struct rte_device *dev)
+{
+ char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
+
+ if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+ return -rte_errno;
+ /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
+ if (mlx5_nl_roce_disable(pci_addr) != 0 &&
+ mlx5_sys_roce_disable(pci_addr) != 0)
+ return -rte_errno;
+ return 0;
+}
+
+static struct ibv_device *
+mlx5_os_get_ibv_dev(const struct rte_device *dev)
+{
+ struct ibv_device *ibv;
+
+ if (mlx5_dev_is_pci(dev))
+ ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr);
+ else
+ ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev));
+ if (ibv == NULL) {
+ rte_errno = ENODEV;
+ DRV_LOG(ERR, "Verbs device not found: %s", dev->name);
+ }
+ return ibv;
+}
+
+static struct ibv_device *
+mlx5_vdpa_get_ibv_dev(const struct rte_device *dev)
+{
+ struct ibv_device *ibv;
+ int retry;
+
+ if (mlx5_roce_disable(dev) != 0) {
+ DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+ dev->name);
+ return NULL;
+ }
+ /* Wait for the IB device to appear again after reload. */
+ for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
+ ibv = mlx5_os_get_ibv_dev(dev);
+ if (ibv != NULL)
+ return ibv;
+ usleep(MLX5_VDPA_USEC);
+ }
+ DRV_LOG(ERR,
+ "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.",
+ dev->name, MLX5_VDPA_MAX_RETRIES);
+ rte_errno = EAGAIN;
+ return NULL;
+}
+
+static int
+mlx5_config_doorbell_mapping_env(int dbnc)
+{
+ char *env;
+ int value;
+
+ MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ /* Get environment variable to store. */
+ env = getenv(MLX5_SHUT_UP_BF);
+ value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
+ if (dbnc == MLX5_ARG_UNSET)
+ setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
+ else
+ setenv(MLX5_SHUT_UP_BF,
+ dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1);
+ return value;
+}
+
+static void
+mlx5_restore_doorbell_mapping_env(int value)
+{
+ MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ /* Restore the original environment variable state. */
+ if (value == MLX5_ARG_UNSET)
+ unsetenv(MLX5_SHUT_UP_BF);
+ else
+ setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
+}
+
+/**
+ * Function API to open IB device.
+ *
+ *
+ * @param cdev
+ * Pointer to the mlx5 device.
+ * @param classes
+ * Chosen classes come from device arguments.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
+{
+ struct ibv_device *ibv;
+ struct ibv_context *ctx = NULL;
+ int dbmap_env;
+
+ if (classes & MLX5_CLASS_VDPA)
+ ibv = mlx5_vdpa_get_ibv_dev(cdev->dev);
+ else
+ ibv = mlx5_os_get_ibv_dev(cdev->dev);
+ if (!ibv)
+ return -rte_errno;
+ DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
+ /*
+ * Configure environment variable "MLX5_BF_SHUT_UP" before the device
+ * creation. The rdma_core library checks the variable at device
+ * creation and stores the result internally.
+ */
+ dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc);
+ /* Try to open IB device with DV first, then usual Verbs. */
+ errno = 0;
+ ctx = mlx5_glue->dv_open_device(ibv);
+ if (ctx) {
+ cdev->config.devx = 1;
+ DRV_LOG(DEBUG, "DevX is supported.");
+ } else if (classes == MLX5_CLASS_ETH) {
+ /* The environment variable is still configured. */
+ ctx = mlx5_glue->open_device(ibv);
+ if (ctx == NULL)
+ goto error;
+ DRV_LOG(DEBUG, "DevX is NOT supported.");
+ } else {
+ goto error;
+ }
+ /* The device is created, no need for environment. */
+ mlx5_restore_doorbell_mapping_env(dbmap_env);
+ /* Hint libmlx5 to use PMD allocator for data plane resources */
+ mlx5_set_context_attr(cdev->dev, ctx);
+ cdev->ctx = ctx;
+ return 0;
+error:
+ rte_errno = errno ? errno : ENODEV;
+ /* The device creation is failed, no need for environment. */
+ mlx5_restore_doorbell_mapping_env(dbmap_env);
+ DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
+ return -rte_errno;
+}
+int
+mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
+{
+ char tmp[512];
+ char cur_ifname[IF_NAMESIZE + 1];
+ FILE *id_file;
+ DIR *dir;
+ struct dirent *ptr;
+ int ret;
+
+ if (guid == NULL || len < sizeof(u_int64_t) + 1)
+ return -1;
+ memset(guid, 0, len);
+ snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net",
+ dev->domain, dev->bus, dev->devid, dev->function);
+ dir = opendir(tmp);
+ if (dir == NULL)
+ return -1;
+ /* Traverse to identify PF interface */
+ do {
+ ptr = readdir(dir);
+ if (ptr == NULL || ptr->d_type != DT_DIR) {
+ closedir(dir);
+ return -1;
+ }
+ } while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') ||
+ strchr(ptr->d_name, 'v'));
+ snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name);
+ closedir(dir);
+ snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp),
+ "/%s/phys_switch_id", cur_ifname);
+ /* Older OFED like 5.3 doesn't support read */
+ id_file = fopen(tmp, "r");
+ if (!id_file)
+ return 0;
+ ret = fscanf(id_file, "%16s", guid);
+ fclose(id_file);
+ return ret;
+}