common/cnxk: support bandwidth profile configure
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
index 1f92c52..6d17d7a 100644 (file)
 #include <rte_malloc.h>
 #include <rte_log.h>
 #include <rte_errno.h>
-#include <rte_pci.h>
 #include <rte_string_fns.h>
+#include <rte_bus_pci.h>
 
 #include <mlx5_glue.h>
 #include <mlx5_common.h>
-#include <mlx5_common_pci.h>
 #include <mlx5_devx_cmds.h>
 #include <mlx5_prm.h>
 #include <mlx5_nl.h>
@@ -24,6 +23,7 @@
 #include "mlx5_vdpa_utils.h"
 #include "mlx5_vdpa.h"
 
+#define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
 
 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
                            (1ULL << VIRTIO_F_ANY_LAYOUT) | \
@@ -44,7 +44,7 @@
 
 #define MLX5_VDPA_MAX_RETRIES 20
 #define MLX5_VDPA_USEC 1000
-#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
 
 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
                                              TAILQ_HEAD_INITIALIZER(priv_list);
@@ -295,6 +295,8 @@ mlx5_vdpa_dev_close(int vid)
        }
        priv->configured = 0;
        priv->vid = 0;
+       /* The mutex may stay locked after event thread cancel - initiate it. */
+       pthread_mutex_init(&priv->vq_config_lock, NULL);
        DRV_LOG(INFO, "vDPA device %d was closed.", vid);
        return ret;
 }
@@ -470,34 +472,6 @@ static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
        .reset_stats = mlx5_vdpa_reset_stats,
 };
 
-static struct ibv_device *
-mlx5_vdpa_get_ib_device_match(struct rte_pci_addr *addr)
-{
-       int n;
-       struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
-       struct ibv_device *ibv_match = NULL;
-
-       if (!ibv_list) {
-               rte_errno = ENOSYS;
-               return NULL;
-       }
-       while (n-- > 0) {
-               struct rte_pci_addr pci_addr;
-
-               DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
-               if (mlx5_dev_to_pci_addr(ibv_list[n]->ibdev_path, &pci_addr))
-                       continue;
-               if (rte_pci_addr_cmp(addr, &pci_addr))
-                       continue;
-               ibv_match = ibv_list[n];
-               break;
-       }
-       if (!ibv_match)
-               rte_errno = ENOENT;
-       mlx5_glue->free_device_list(ibv_list);
-       return ibv_match;
-}
-
 /* Try to disable ROCE by Netlink\Devlink. */
 static int
 mlx5_vdpa_nl_roce_disable(const char *addr)
@@ -577,34 +551,17 @@ close:
 }
 
 static int
-mlx5_vdpa_roce_disable(struct rte_pci_addr *addr, struct ibv_device **ibv)
+mlx5_vdpa_roce_disable(struct rte_device *dev)
 {
-       char addr_name[64] = {0};
+       char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
 
-       rte_pci_device_name(addr, addr_name, sizeof(addr_name));
+       if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+               return -rte_errno;
        /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
-       if (mlx5_vdpa_nl_roce_disable(addr_name) == 0 ||
-           mlx5_vdpa_sys_roce_disable(addr_name) == 0) {
-               /*
-                * Succeed to disable ROCE, wait for the IB device to appear
-                * again after reload.
-                */
-               int r;
-               struct ibv_device *ibv_new;
-
-               for (r = MLX5_VDPA_MAX_RETRIES; r; r--) {
-                       ibv_new = mlx5_vdpa_get_ib_device_match(addr);
-                       if (ibv_new) {
-                               *ibv = ibv_new;
-                               return 0;
-                       }
-                       usleep(MLX5_VDPA_USEC);
-               }
-               DRV_LOG(ERR, "Cannot much device %s after ROCE disable, "
-                       "retries exceed %d", addr_name, MLX5_VDPA_MAX_RETRIES);
-               rte_errno = EAGAIN;
-       }
-       return -rte_errno;
+       if (mlx5_vdpa_nl_roce_disable(pci_addr) != 0 &&
+           mlx5_vdpa_sys_roce_disable(pci_addr) != 0)
+               return -rte_errno;
+       return 0;
 }
 
 static int
@@ -614,7 +571,7 @@ mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
        unsigned long tmp;
        int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
 
-       if (strcmp(key, "class") == 0)
+       if (strcmp(key, RTE_DEVARGS_KEY_CLASS) == 0)
                return 0;
        errno = 0;
        tmp = strtoul(val, NULL, 0);
@@ -630,12 +587,18 @@ mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
        } else if (strcmp(key, "event_us") == 0) {
                priv->event_us = (uint32_t)tmp;
        } else if (strcmp(key, "no_traffic_time") == 0) {
-               priv->no_traffic_time_s = (uint32_t)tmp;
+               priv->no_traffic_max = (uint32_t)tmp;
        } else if (strcmp(key, "event_core") == 0) {
                if (tmp >= (unsigned long)n_cores)
                        DRV_LOG(WARNING, "Invalid event_core %s.", val);
                else
                        priv->event_core = tmp;
+       } else if (strcmp(key, "hw_latency_mode") == 0) {
+               priv->hw_latency_mode = (uint32_t)tmp;
+       } else if (strcmp(key, "hw_max_latency_us") == 0) {
+               priv->hw_max_latency_us = (uint32_t)tmp;
+       } else if (strcmp(key, "hw_max_pending_comp") == 0) {
+               priv->hw_max_pending_comp = (uint32_t)tmp;
        } else {
                DRV_LOG(WARNING, "Invalid key %s.", key);
        }
@@ -647,10 +610,10 @@ mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
 {
        struct rte_kvargs *kvlist;
 
-       priv->event_mode = MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER;
+       priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
        priv->event_us = 0;
        priv->event_core = -1;
-       priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+       priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
        if (devargs == NULL)
                return;
        kvlist = rte_kvargs_parse(devargs->args, NULL);
@@ -663,47 +626,36 @@ mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
                priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
        DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
        DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
-       DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+       DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
 }
 
-/**
- * DPDK callback to register a mlx5 PCI device.
- *
- * This function spawns vdpa device out of a given PCI device.
- *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_vpda_driver).
- * @param[in] pci_dev
- *   PCI device information.
- *
- * @return
- *   0 on success, 1 to skip this driver, a negative errno value otherwise
- *   and rte_errno is set.
- */
 static int
-mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
-                   struct rte_pci_device *pci_dev __rte_unused)
+mlx5_vdpa_dev_probe(struct rte_device *dev)
 {
        struct ibv_device *ibv;
        struct mlx5_vdpa_priv *priv = NULL;
        struct ibv_context *ctx = NULL;
        struct mlx5_hca_attr attr;
+       int retry;
        int ret;
 
-       ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr);
-       if (!ibv) {
-               DRV_LOG(ERR, "No matching IB device for PCI slot "
-                       PCI_PRI_FMT ".", pci_dev->addr.domain,
-                       pci_dev->addr.bus, pci_dev->addr.devid,
-                       pci_dev->addr.function);
+       if (mlx5_vdpa_roce_disable(dev) != 0) {
+               DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+                       dev->name);
                return -rte_errno;
-       } else {
-               DRV_LOG(INFO, "PCI information matches for device \"%s\".",
-                       ibv->name);
        }
-       if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
-               DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
-                       ibv->name);
+       /* Wait for the IB device to appear again after reload. */
+       for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
+               ibv = mlx5_os_get_ibv_dev(dev);
+               if (ibv != NULL)
+                       break;
+               usleep(MLX5_VDPA_USEC);
+       }
+       if (ibv == NULL) {
+               DRV_LOG(ERR, "Cannot get IB device after disabling RoCE for "
+                               "\"%s\", retries exceed %d.",
+                               dev->name, MLX5_VDPA_MAX_RETRIES);
+               rte_errno = EAGAIN;
                return -rte_errno;
        }
        ctx = mlx5_glue->dv_open_device(ibv);
@@ -737,23 +689,22 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        priv->caps = attr.vdpa;
        priv->log_max_rqt_size = attr.log_max_rqt_size;
        priv->num_lag_ports = attr.num_lag_ports;
+       priv->qp_ts_format = attr.qp_ts_format;
        if (attr.num_lag_ports == 0)
                priv->num_lag_ports = 1;
        priv->ctx = ctx;
-       priv->pci_dev = pci_dev;
        priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
        if (!priv->var) {
-               DRV_LOG(ERR, "Failed to allocate VAR %u.\n", errno);
+               DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
                goto error;
        }
-       priv->vdev = rte_vdpa_register_device(&pci_dev->device,
-                       &mlx5_vdpa_ops);
+       priv->vdev = rte_vdpa_register_device(dev, &mlx5_vdpa_ops);
        if (priv->vdev == NULL) {
                DRV_LOG(ERR, "Failed to register vDPA device.");
                rte_errno = rte_errno ? rte_errno : EINVAL;
                goto error;
        }
-       mlx5_vdpa_config_get(pci_dev->device.devargs, priv);
+       mlx5_vdpa_config_get(dev->devargs, priv);
        SLIST_INIT(&priv->mr_list);
        pthread_mutex_init(&priv->vq_config_lock, NULL);
        pthread_mutex_lock(&priv_list_lock);
@@ -772,26 +723,15 @@ error:
        return -rte_errno;
 }
 
-/**
- * DPDK callback to remove a PCI device.
- *
- * This function removes all vDPA devices belong to a given PCI device.
- *
- * @param[in] pci_dev
- *   Pointer to the PCI device.
- *
- * @return
- *   0 on success, the function cannot fail.
- */
 static int
-mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev)
+mlx5_vdpa_dev_remove(struct rte_device *dev)
 {
        struct mlx5_vdpa_priv *priv = NULL;
        int found = 0;
 
        pthread_mutex_lock(&priv_list_lock);
        TAILQ_FOREACH(priv, &priv_list, next) {
-               if (!rte_pci_addr_cmp(&priv->pci_dev->addr, &pci_dev->addr)) {
+               if (priv->vdev->device == dev) {
                        found = 1;
                        break;
                }
@@ -806,6 +746,8 @@ mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev)
                        mlx5_glue->dv_free_var(priv->var);
                        priv->var = NULL;
                }
+               if (priv->vdev)
+                       rte_vdpa_unregister_device(priv->vdev);
                mlx5_glue->close_device(priv->ctx);
                pthread_mutex_destroy(&priv->vq_config_lock);
                rte_free(priv);
@@ -847,20 +789,15 @@ static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
        }
 };
 
-static struct mlx5_pci_driver mlx5_vdpa_driver = {
-       .driver_class = MLX5_CLASS_VDPA,
-       .pci_driver = {
-               .driver = {
-                       .name = "mlx5_vdpa",
-               },
-               .id_table = mlx5_vdpa_pci_id_map,
-               .probe = mlx5_vdpa_pci_probe,
-               .remove = mlx5_vdpa_pci_remove,
-               .drv_flags = 0,
-       },
+static struct mlx5_class_driver mlx5_vdpa_driver = {
+       .drv_class = MLX5_CLASS_VDPA,
+       .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
+       .id_table = mlx5_vdpa_pci_id_map,
+       .probe = mlx5_vdpa_dev_probe,
+       .remove = mlx5_vdpa_dev_remove,
 };
 
-RTE_LOG_REGISTER(mlx5_vdpa_logtype, pmd.vdpa.mlx5, NOTICE)
+RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
 
 /**
  * Driver initialization routine.
@@ -869,9 +806,9 @@ RTE_INIT(rte_mlx5_vdpa_init)
 {
        mlx5_common_init();
        if (mlx5_glue)
-               mlx5_pci_driver_register(&mlx5_vdpa_driver);
+               mlx5_class_driver_register(&mlx5_vdpa_driver);
 }
 
-RTE_PMD_EXPORT_NAME(net_mlx5_vdpa, __COUNTER__);
-RTE_PMD_REGISTER_PCI_TABLE(net_mlx5_vdpa, mlx5_vdpa_pci_id_map);
-RTE_PMD_REGISTER_KMOD_DEP(net_mlx5_vdpa, "* ib_uverbs & mlx5_core & mlx5_ib");
+RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
+RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
+RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");