common/mlx5: introduce user index field in completion
[dpdk.git] / drivers / common / mlx5 / linux / mlx5_common_os.c
index 5cf9576..b516564 100644 (file)
@@ -2,6 +2,7 @@
  * Copyright 2020 Mellanox Technologies, Ltd
  */
 
+#include <sys/types.h>
 #include <unistd.h>
 #include <string.h>
 #include <stdio.h>
 
 #include <rte_errno.h>
 #include <rte_string_fns.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_auxiliary.h>
 
 #include "mlx5_common.h"
-#include "mlx5_common_utils.h"
+#include "mlx5_nl.h"
+#include "mlx5_common_log.h"
+#include "mlx5_common_private.h"
+#include "mlx5_common_defs.h"
+#include "mlx5_common_os.h"
 #include "mlx5_glue.h"
 
 #ifdef MLX5_GLUE
 const struct mlx5_glue *mlx5_glue;
 #endif
 
-/**
- * Get PCI information by sysfs device path.
- *
- * @param dev_path
- *   Pointer to device sysfs folder name.
- * @param[out] pci_addr
- *   PCI bus address output buffer.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
 int
-mlx5_dev_to_pci_addr(const char *dev_path,
-                    struct rte_pci_addr *pci_addr)
+mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
 {
        FILE *file;
        char line[32];
@@ -158,17 +153,6 @@ mlx5_translate_port_name(const char *port_name_in,
        port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
 }
 
-/**
- * Get kernel interface name from IB device path.
- *
- * @param[in] ibdev_path
- *   Pointer to IB device path.
- * @param[out] ifname
- *   Interface name output buffer.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
 int
 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
 {
@@ -423,3 +407,340 @@ glue_error:
        mlx5_glue = NULL;
 }
 
+/**
+ * Allocate Protection Domain object and extract its pdn using DV API.
+ *
+ * @param[out] cdev
+ *   Pointer to the mlx5 device.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_pd_create(struct mlx5_common_device *cdev)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       struct mlx5dv_obj obj;
+       struct mlx5dv_pd pd_info;
+       int ret;
+#endif
+
+       cdev->pd = mlx5_glue->alloc_pd(cdev->ctx);
+       if (cdev->pd == NULL) {
+               DRV_LOG(ERR, "Failed to allocate PD.");
+               return errno ? -errno : -ENOMEM;
+       }
+       if (cdev->config.devx == 0)
+               return 0;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       obj.pd.in = cdev->pd;
+       obj.pd.out = &pd_info;
+       ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
+       if (ret != 0) {
+               DRV_LOG(ERR, "Fail to get PD object info.");
+               mlx5_glue->dealloc_pd(cdev->pd);
+               cdev->pd = NULL;
+               return -errno;
+       }
+       cdev->pdn = pd_info.pdn;
+       return 0;
+#else
+       DRV_LOG(ERR, "Cannot get pdn - no DV support.");
+       return -ENOTSUP;
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+}
+
+static struct ibv_device *
+mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
+{
+       int n;
+       struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
+       struct ibv_device *ibv_match = NULL;
+
+       if (ibv_list == NULL) {
+               rte_errno = ENOSYS;
+               return NULL;
+       }
+       while (n-- > 0) {
+               struct rte_pci_addr paddr;
+
+               DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
+               if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
+                       continue;
+               if (rte_pci_addr_cmp(addr, &paddr) != 0)
+                       continue;
+               ibv_match = ibv_list[n];
+               break;
+       }
+       if (ibv_match == NULL) {
+               DRV_LOG(WARNING,
+                       "No Verbs device matches PCI device " PCI_PRI_FMT ","
+                       " are kernel drivers loaded?",
+                       addr->domain, addr->bus, addr->devid, addr->function);
+               rte_errno = ENOENT;
+       }
+       mlx5_glue->free_device_list(ibv_list);
+       return ibv_match;
+}
+
+/* Try to disable ROCE by Netlink\Devlink. */
+static int
+mlx5_nl_roce_disable(const char *addr)
+{
+       int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+       int devlink_id;
+       int enable;
+       int ret;
+
+       if (nlsk_fd < 0)
+               return nlsk_fd;
+       devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
+       if (devlink_id < 0) {
+               ret = devlink_id;
+               DRV_LOG(DEBUG,
+                       "Failed to get devlink id for ROCE operations by Netlink.");
+               goto close;
+       }
+       ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
+       if (ret) {
+               DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
+                       ret);
+               goto close;
+       } else if (!enable) {
+               DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
+               goto close;
+       }
+       ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
+       if (ret)
+               DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
+       else
+               DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
+close:
+       close(nlsk_fd);
+       return ret;
+}
+
+/* Try to disable ROCE by sysfs. */
+static int
+mlx5_sys_roce_disable(const char *addr)
+{
+       FILE *file_o;
+       int enable;
+       int ret;
+
+       MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
+       file_o = fopen(file_p, "rb");
+       if (!file_o) {
+               rte_errno = ENOTSUP;
+               return -ENOTSUP;
+       }
+       ret = fscanf(file_o, "%d", &enable);
+       if (ret != 1) {
+               rte_errno = EINVAL;
+               ret = EINVAL;
+               goto close;
+       } else if (!enable) {
+               ret = 0;
+               DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
+               goto close;
+       }
+       fclose(file_o);
+       file_o = fopen(file_p, "wb");
+       if (!file_o) {
+               rte_errno = ENOTSUP;
+               return -ENOTSUP;
+       }
+       fprintf(file_o, "0\n");
+       ret = 0;
+close:
+       if (ret)
+               DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
+       else
+               DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
+       fclose(file_o);
+       return ret;
+}
+
+static int
+mlx5_roce_disable(const struct rte_device *dev)
+{
+       char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
+
+       if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+               return -rte_errno;
+       /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
+       if (mlx5_nl_roce_disable(pci_addr) != 0 &&
+           mlx5_sys_roce_disable(pci_addr) != 0)
+               return -rte_errno;
+       return 0;
+}
+
+static struct ibv_device *
+mlx5_os_get_ibv_dev(const struct rte_device *dev)
+{
+       struct ibv_device *ibv;
+
+       if (mlx5_dev_is_pci(dev))
+               ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr);
+       else
+               ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev));
+       if (ibv == NULL) {
+               rte_errno = ENODEV;
+               DRV_LOG(ERR, "Verbs device not found: %s", dev->name);
+       }
+       return ibv;
+}
+
+static struct ibv_device *
+mlx5_vdpa_get_ibv_dev(const struct rte_device *dev)
+{
+       struct ibv_device *ibv;
+       int retry;
+
+       if (mlx5_roce_disable(dev) != 0) {
+               DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+                       dev->name);
+               return NULL;
+       }
+       /* Wait for the IB device to appear again after reload. */
+       for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
+               ibv = mlx5_os_get_ibv_dev(dev);
+               if (ibv != NULL)
+                       return ibv;
+               usleep(MLX5_VDPA_USEC);
+       }
+       DRV_LOG(ERR,
+               "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.",
+               dev->name, MLX5_VDPA_MAX_RETRIES);
+       rte_errno = EAGAIN;
+       return NULL;
+}
+
+static int
+mlx5_config_doorbell_mapping_env(int dbnc)
+{
+       char *env;
+       int value;
+
+       MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+       /* Get environment variable to store. */
+       env = getenv(MLX5_SHUT_UP_BF);
+       value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
+       if (dbnc == MLX5_ARG_UNSET)
+               setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
+       else
+               setenv(MLX5_SHUT_UP_BF,
+                      dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1);
+       return value;
+}
+
+static void
+mlx5_restore_doorbell_mapping_env(int value)
+{
+       MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+       /* Restore the original environment variable state. */
+       if (value == MLX5_ARG_UNSET)
+               unsetenv(MLX5_SHUT_UP_BF);
+       else
+               setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
+}
+
+/**
+ * Function API to open IB device.
+ *
+ *
+ * @param cdev
+ *   Pointer to the mlx5 device.
+ * @param classes
+ *   Chosen classes come from device arguments.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
+{
+       struct ibv_device *ibv;
+       struct ibv_context *ctx = NULL;
+       int dbmap_env;
+
+       if (classes & MLX5_CLASS_VDPA)
+               ibv = mlx5_vdpa_get_ibv_dev(cdev->dev);
+       else
+               ibv = mlx5_os_get_ibv_dev(cdev->dev);
+       if (!ibv)
+               return -rte_errno;
+       DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
+       /*
+        * Configure environment variable "MLX5_BF_SHUT_UP" before the device
+        * creation. The rdma_core library checks the variable at device
+        * creation and stores the result internally.
+        */
+       dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc);
+       /* Try to open IB device with DV first, then usual Verbs. */
+       errno = 0;
+       ctx = mlx5_glue->dv_open_device(ibv);
+       if (ctx) {
+               cdev->config.devx = 1;
+               DRV_LOG(DEBUG, "DevX is supported.");
+       } else if (classes == MLX5_CLASS_ETH) {
+               /* The environment variable is still configured. */
+               ctx = mlx5_glue->open_device(ibv);
+               if (ctx == NULL)
+                       goto error;
+               DRV_LOG(DEBUG, "DevX is NOT supported.");
+       } else {
+               goto error;
+       }
+       /* The device is created, no need for environment. */
+       mlx5_restore_doorbell_mapping_env(dbmap_env);
+       /* Hint libmlx5 to use PMD allocator for data plane resources */
+       mlx5_set_context_attr(cdev->dev, ctx);
+       cdev->ctx = ctx;
+       return 0;
+error:
+       rte_errno = errno ? errno : ENODEV;
+       /* The device creation is failed, no need for environment. */
+       mlx5_restore_doorbell_mapping_env(dbmap_env);
+       DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
+       return -rte_errno;
+}
+int
+mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
+{
+       char tmp[512];
+       char cur_ifname[IF_NAMESIZE + 1];
+       FILE *id_file;
+       DIR *dir;
+       struct dirent *ptr;
+       int ret;
+
+       if (guid == NULL || len < sizeof(u_int64_t) + 1)
+               return -1;
+       memset(guid, 0, len);
+       snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net",
+                       dev->domain, dev->bus, dev->devid, dev->function);
+       dir = opendir(tmp);
+       if (dir == NULL)
+               return -1;
+       /* Traverse to identify PF interface */
+       do {
+               ptr = readdir(dir);
+               if (ptr == NULL || ptr->d_type != DT_DIR) {
+                       closedir(dir);
+                       return -1;
+               }
+       } while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') ||
+                strchr(ptr->d_name, 'v'));
+       snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name);
+       closedir(dir);
+       snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp),
+                       "/%s/phys_switch_id", cur_ifname);
+       /* Older OFED like 5.3 doesn't support read */
+       id_file = fopen(tmp, "r");
+       if (!id_file)
+               return 0;
+       ret = fscanf(id_file, "%16s", guid);
+       fclose(id_file);
+       return ret;
+}