common/cnxk: support bandwidth profile configure
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
index 9ea032d..6d17d7a 100644 (file)
@@ -11,9 +11,8 @@
 #include <rte_malloc.h>
 #include <rte_log.h>
 #include <rte_errno.h>
-#include <rte_bus_pci.h>
-#include <rte_pci.h>
 #include <rte_string_fns.h>
+#include <rte_bus_pci.h>
 
 #include <mlx5_glue.h>
 #include <mlx5_common.h>
@@ -24,6 +23,7 @@
 #include "mlx5_vdpa_utils.h"
 #include "mlx5_vdpa.h"
 
+#define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
 
 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
                            (1ULL << VIRTIO_F_ANY_LAYOUT) | \
                             (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
                             (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
                             (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
-                            (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))
+                            (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+                            (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
 
 #define MLX5_VDPA_MAX_RETRIES 20
 #define MLX5_VDPA_USEC 1000
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
 
 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
                                              TAILQ_HEAD_INITIALIZER(priv_list);
 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
-int mlx5_vdpa_logtype;
 
 static struct mlx5_vdpa_priv *
 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
@@ -129,10 +130,10 @@ mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
 static int
 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
 {
-       struct rte_vdpa_device *vdev = rte_vdpa_get_device(
-                       rte_vhost_get_vdpa_device_id(vid));
+       struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
        struct mlx5_vdpa_priv *priv =
                mlx5_vdpa_find_priv_resource_by_vdev(vdev);
+       int ret;
 
        if (priv == NULL) {
                DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
@@ -142,37 +143,16 @@ mlx5_vdpa_set_vring_state(int vid, int vring, int state)
                DRV_LOG(ERR, "Too big vring id: %d.", vring);
                return -E2BIG;
        }
-       return mlx5_vdpa_virtq_enable(priv, vring, state);
-}
-
-static int
-mlx5_vdpa_direct_db_prepare(struct mlx5_vdpa_priv *priv)
-{
-       int ret;
-
-       if (priv->direct_notifier) {
-               ret = rte_vhost_host_notifier_ctrl(priv->vid, false);
-               if (ret != 0) {
-                       DRV_LOG(INFO, "Direct HW notifier FD cannot be "
-                               "destroyed for device %d: %d.", priv->vid, ret);
-                       return -1;
-               }
-               priv->direct_notifier = 0;
-       }
-       ret = rte_vhost_host_notifier_ctrl(priv->vid, true);
-       if (ret != 0)
-               DRV_LOG(INFO, "Direct HW notifier FD cannot be configured for"
-                       " device %d: %d.", priv->vid, ret);
-       else
-               priv->direct_notifier = 1;
-       return 0;
+       pthread_mutex_lock(&priv->vq_config_lock);
+       ret = mlx5_vdpa_virtq_enable(priv, vring, state);
+       pthread_mutex_unlock(&priv->vq_config_lock);
+       return ret;
 }
 
 static int
 mlx5_vdpa_features_set(int vid)
 {
-       struct rte_vdpa_device *vdev = rte_vdpa_get_device(
-                       rte_vhost_get_vdpa_device_id(vid));
+       struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
        struct mlx5_vdpa_priv *priv =
                mlx5_vdpa_find_priv_resource_by_vdev(vdev);
        uint64_t log_base, log_size;
@@ -292,8 +272,7 @@ mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
 static int
 mlx5_vdpa_dev_close(int vid)
 {
-       struct rte_vdpa_device *vdev = rte_vdpa_get_device(
-                       rte_vhost_get_vdpa_device_id(vid));
+       struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
        struct mlx5_vdpa_priv *priv =
                mlx5_vdpa_find_priv_resource_by_vdev(vdev);
        int ret = 0;
@@ -304,6 +283,7 @@ mlx5_vdpa_dev_close(int vid)
        }
        if (priv->configured)
                ret |= mlx5_vdpa_lm_log(priv);
+       mlx5_vdpa_err_event_unset(priv);
        mlx5_vdpa_cqe_event_unset(priv);
        mlx5_vdpa_steer_unset(priv);
        mlx5_vdpa_virtqs_release(priv);
@@ -315,6 +295,8 @@ mlx5_vdpa_dev_close(int vid)
        }
        priv->configured = 0;
        priv->vid = 0;
+       /* The mutex may stay locked after event thread cancel - initiate it. */
+       pthread_mutex_init(&priv->vq_config_lock, NULL);
        DRV_LOG(INFO, "vDPA device %d was closed.", vid);
        return ret;
 }
@@ -322,8 +304,7 @@ mlx5_vdpa_dev_close(int vid)
 static int
 mlx5_vdpa_dev_config(int vid)
 {
-       struct rte_vdpa_device *vdev = rte_vdpa_get_device(
-                       rte_vhost_get_vdpa_device_id(vid));
+       struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
        struct mlx5_vdpa_priv *priv =
                mlx5_vdpa_find_priv_resource_by_vdev(vdev);
 
@@ -340,7 +321,7 @@ mlx5_vdpa_dev_config(int vid)
                DRV_LOG(WARNING, "MTU cannot be set on device %s.",
                                vdev->device->name);
        if (mlx5_vdpa_pd_create(priv) || mlx5_vdpa_mem_register(priv) ||
-           mlx5_vdpa_direct_db_prepare(priv) ||
+           mlx5_vdpa_err_event_setup(priv) ||
            mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
            mlx5_vdpa_cqe_event_setup(priv)) {
                mlx5_vdpa_dev_close(vid);
@@ -354,8 +335,7 @@ mlx5_vdpa_dev_config(int vid)
 static int
 mlx5_vdpa_get_device_fd(int vid)
 {
-       struct rte_vdpa_device *vdev = rte_vdpa_get_device(
-                       rte_vhost_get_vdpa_device_id(vid));
+       struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
        struct mlx5_vdpa_priv *priv =
                mlx5_vdpa_find_priv_resource_by_vdev(vdev);
 
@@ -369,8 +349,7 @@ mlx5_vdpa_get_device_fd(int vid)
 static int
 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
 {
-       struct rte_vdpa_device *vdev = rte_vdpa_get_device(
-                       rte_vhost_get_vdpa_device_id(vid));
+       struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
        struct mlx5_vdpa_priv *priv =
                mlx5_vdpa_find_priv_resource_by_vdev(vdev);
 
@@ -493,34 +472,6 @@ static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
        .reset_stats = mlx5_vdpa_reset_stats,
 };
 
-static struct ibv_device *
-mlx5_vdpa_get_ib_device_match(struct rte_pci_addr *addr)
-{
-       int n;
-       struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
-       struct ibv_device *ibv_match = NULL;
-
-       if (!ibv_list) {
-               rte_errno = ENOSYS;
-               return NULL;
-       }
-       while (n-- > 0) {
-               struct rte_pci_addr pci_addr;
-
-               DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
-               if (mlx5_dev_to_pci_addr(ibv_list[n]->ibdev_path, &pci_addr))
-                       continue;
-               if (rte_pci_addr_cmp(addr, &pci_addr))
-                       continue;
-               ibv_match = ibv_list[n];
-               break;
-       }
-       if (!ibv_match)
-               rte_errno = ENOENT;
-       mlx5_glue->free_device_list(ibv_list);
-       return ibv_match;
-}
-
 /* Try to disable ROCE by Netlink\Devlink. */
 static int
 mlx5_vdpa_nl_roce_disable(const char *addr)
@@ -600,79 +551,111 @@ close:
 }
 
 static int
-mlx5_vdpa_roce_disable(struct rte_pci_addr *addr, struct ibv_device **ibv)
+mlx5_vdpa_roce_disable(struct rte_device *dev)
 {
-       char addr_name[64] = {0};
+       char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
 
-       rte_pci_device_name(addr, addr_name, sizeof(addr_name));
+       if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+               return -rte_errno;
        /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
-       if (mlx5_vdpa_nl_roce_disable(addr_name) == 0 ||
-           mlx5_vdpa_sys_roce_disable(addr_name) == 0) {
-               /*
-                * Succeed to disable ROCE, wait for the IB device to appear
-                * again after reload.
-                */
-               int r;
-               struct ibv_device *ibv_new;
-
-               for (r = MLX5_VDPA_MAX_RETRIES; r; r--) {
-                       ibv_new = mlx5_vdpa_get_ib_device_match(addr);
-                       if (ibv_new) {
-                               *ibv = ibv_new;
-                               return 0;
-                       }
-                       usleep(MLX5_VDPA_USEC);
-               }
-               DRV_LOG(ERR, "Cannot much device %s after ROCE disable, "
-                       "retries exceed %d", addr_name, MLX5_VDPA_MAX_RETRIES);
-               rte_errno = EAGAIN;
+       if (mlx5_vdpa_nl_roce_disable(pci_addr) != 0 &&
+           mlx5_vdpa_sys_roce_disable(pci_addr) != 0)
+               return -rte_errno;
+       return 0;
+}
+
+static int
+mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
+{
+       struct mlx5_vdpa_priv *priv = opaque;
+       unsigned long tmp;
+       int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+
+       if (strcmp(key, RTE_DEVARGS_KEY_CLASS) == 0)
+               return 0;
+       errno = 0;
+       tmp = strtoul(val, NULL, 0);
+       if (errno) {
+               DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
+               return -errno;
        }
-       return -rte_errno;
+       if (strcmp(key, "event_mode") == 0) {
+               if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
+                       priv->event_mode = (int)tmp;
+               else
+                       DRV_LOG(WARNING, "Invalid event_mode %s.", val);
+       } else if (strcmp(key, "event_us") == 0) {
+               priv->event_us = (uint32_t)tmp;
+       } else if (strcmp(key, "no_traffic_time") == 0) {
+               priv->no_traffic_max = (uint32_t)tmp;
+       } else if (strcmp(key, "event_core") == 0) {
+               if (tmp >= (unsigned long)n_cores)
+                       DRV_LOG(WARNING, "Invalid event_core %s.", val);
+               else
+                       priv->event_core = tmp;
+       } else if (strcmp(key, "hw_latency_mode") == 0) {
+               priv->hw_latency_mode = (uint32_t)tmp;
+       } else if (strcmp(key, "hw_max_latency_us") == 0) {
+               priv->hw_max_latency_us = (uint32_t)tmp;
+       } else if (strcmp(key, "hw_max_pending_comp") == 0) {
+               priv->hw_max_pending_comp = (uint32_t)tmp;
+       } else {
+               DRV_LOG(WARNING, "Invalid key %s.", key);
+       }
+       return 0;
+}
+
+static void
+mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
+{
+       struct rte_kvargs *kvlist;
+
+       priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
+       priv->event_us = 0;
+       priv->event_core = -1;
+       priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
+       if (devargs == NULL)
+               return;
+       kvlist = rte_kvargs_parse(devargs->args, NULL);
+       if (kvlist == NULL)
+               return;
+       rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv);
+       rte_kvargs_free(kvlist);
+       if (!priv->event_us &&
+           priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
+               priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
+       DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
+       DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
+       DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
 }
 
-/**
- * DPDK callback to register a PCI device.
- *
- * This function spawns vdpa device out of a given PCI device.
- *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_vpda_driver).
- * @param[in] pci_dev
- *   PCI device information.
- *
- * @return
- *   0 on success, 1 to skip this driver, a negative errno value otherwise
- *   and rte_errno is set.
- */
 static int
-mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
-                   struct rte_pci_device *pci_dev __rte_unused)
+mlx5_vdpa_dev_probe(struct rte_device *dev)
 {
        struct ibv_device *ibv;
        struct mlx5_vdpa_priv *priv = NULL;
        struct ibv_context *ctx = NULL;
        struct mlx5_hca_attr attr;
+       int retry;
        int ret;
 
-       if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_VDPA) {
-               DRV_LOG(DEBUG, "Skip probing - should be probed by other mlx5"
-                       " driver.");
-               return 1;
-       }
-       ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr);
-       if (!ibv) {
-               DRV_LOG(ERR, "No matching IB device for PCI slot "
-                       PCI_PRI_FMT ".", pci_dev->addr.domain,
-                       pci_dev->addr.bus, pci_dev->addr.devid,
-                       pci_dev->addr.function);
+       if (mlx5_vdpa_roce_disable(dev) != 0) {
+               DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+                       dev->name);
                return -rte_errno;
-       } else {
-               DRV_LOG(INFO, "PCI information matches for device \"%s\".",
-                       ibv->name);
        }
-       if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
-               DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
-                       ibv->name);
+       /* Wait for the IB device to appear again after reload. */
+       for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
+               ibv = mlx5_os_get_ibv_dev(dev);
+               if (ibv != NULL)
+                       break;
+               usleep(MLX5_VDPA_USEC);
+       }
+       if (ibv == NULL) {
+               DRV_LOG(ERR, "Cannot get IB device after disabling RoCE for "
+                               "\"%s\", retries exceed %d.",
+                               dev->name, MLX5_VDPA_MAX_RETRIES);
+               rte_errno = EAGAIN;
                return -rte_errno;
        }
        ctx = mlx5_glue->dv_open_device(ibv);
@@ -705,21 +688,25 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        }
        priv->caps = attr.vdpa;
        priv->log_max_rqt_size = attr.log_max_rqt_size;
+       priv->num_lag_ports = attr.num_lag_ports;
+       priv->qp_ts_format = attr.qp_ts_format;
+       if (attr.num_lag_ports == 0)
+               priv->num_lag_ports = 1;
        priv->ctx = ctx;
-       priv->pci_dev = pci_dev;
        priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
        if (!priv->var) {
-               DRV_LOG(ERR, "Failed to allocate VAR %u.\n", errno);
+               DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
                goto error;
        }
-       priv->vdev = rte_vdpa_register_device(&pci_dev->device,
-                       &mlx5_vdpa_ops);
+       priv->vdev = rte_vdpa_register_device(dev, &mlx5_vdpa_ops);
        if (priv->vdev == NULL) {
                DRV_LOG(ERR, "Failed to register vDPA device.");
                rte_errno = rte_errno ? rte_errno : EINVAL;
                goto error;
        }
+       mlx5_vdpa_config_get(dev->devargs, priv);
        SLIST_INIT(&priv->mr_list);
+       pthread_mutex_init(&priv->vq_config_lock, NULL);
        pthread_mutex_lock(&priv_list_lock);
        TAILQ_INSERT_TAIL(&priv_list, priv, next);
        pthread_mutex_unlock(&priv_list_lock);
@@ -736,26 +723,15 @@ error:
        return -rte_errno;
 }
 
-/**
- * DPDK callback to remove a PCI device.
- *
- * This function removes all vDPA devices belong to a given PCI device.
- *
- * @param[in] pci_dev
- *   Pointer to the PCI device.
- *
- * @return
- *   0 on success, the function cannot fail.
- */
 static int
-mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev)
+mlx5_vdpa_dev_remove(struct rte_device *dev)
 {
        struct mlx5_vdpa_priv *priv = NULL;
        int found = 0;
 
        pthread_mutex_lock(&priv_list_lock);
        TAILQ_FOREACH(priv, &priv_list, next) {
-               if (!rte_pci_addr_cmp(&priv->pci_dev->addr, &pci_dev->addr)) {
+               if (priv->vdev->device == dev) {
                        found = 1;
                        break;
                }
@@ -770,7 +746,10 @@ mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev)
                        mlx5_glue->dv_free_var(priv->var);
                        priv->var = NULL;
                }
+               if (priv->vdev)
+                       rte_vdpa_unregister_device(priv->vdev);
                mlx5_glue->close_device(priv->ctx);
+               pthread_mutex_destroy(&priv->vq_config_lock);
                rte_free(priv);
        }
        return 0;
@@ -791,40 +770,45 @@ static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
        },
        {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
-                               PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF)
+                               PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
        },
        {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
                                PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
        },
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+                               PCI_DEVICE_ID_MELLANOX_CONNECTX7)
+       },
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+                               PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
+       },
        {
                .vendor_id = 0
        }
 };
 
-static struct rte_pci_driver mlx5_vdpa_driver = {
-       .driver = {
-               .name = "mlx5_vdpa",
-       },
+static struct mlx5_class_driver mlx5_vdpa_driver = {
+       .drv_class = MLX5_CLASS_VDPA,
+       .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
        .id_table = mlx5_vdpa_pci_id_map,
-       .probe = mlx5_vdpa_pci_probe,
-       .remove = mlx5_vdpa_pci_remove,
-       .drv_flags = 0,
+       .probe = mlx5_vdpa_dev_probe,
+       .remove = mlx5_vdpa_dev_remove,
 };
 
+RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
+
 /**
  * Driver initialization routine.
  */
 RTE_INIT(rte_mlx5_vdpa_init)
 {
-       /* Initialize common log type. */
-       mlx5_vdpa_logtype = rte_log_register("pmd.vdpa.mlx5");
-       if (mlx5_vdpa_logtype >= 0)
-               rte_log_set_level(mlx5_vdpa_logtype, RTE_LOG_NOTICE);
+       mlx5_common_init();
        if (mlx5_glue)
-               rte_pci_register(&mlx5_vdpa_driver);
+               mlx5_class_driver_register(&mlx5_vdpa_driver);
 }
 
-RTE_PMD_EXPORT_NAME(net_mlx5_vdpa, __COUNTER__);
-RTE_PMD_REGISTER_PCI_TABLE(net_mlx5_vdpa, mlx5_vdpa_pci_id_map);
-RTE_PMD_REGISTER_KMOD_DEP(net_mlx5_vdpa, "* ib_uverbs & mlx5_core & mlx5_ib");
+RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
+RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
+RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");