raw/ioat: add bus driver for device scanning automatically
authorBruce Richardson <bruce.richardson@intel.com>
Tue, 4 May 2021 13:14:54 +0000 (14:14 +0100)
committerThomas Monjalon <thomas@monjalon.net>
Tue, 4 May 2021 15:29:06 +0000 (17:29 +0200)
Rather than using a vdev with args, DPDK can scan and initialize the
devices automatically using a bus-type driver. This bus does not need to
worry about registering device drivers, rather it can initialize the
devices directly on probe.

The device instances (queues) to use are detected from /dev with the
additional info about them got from /sys.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
doc/guides/rawdevs/ioat.rst
drivers/raw/ioat/idxd_bus.c [new file with mode: 0644]
drivers/raw/ioat/idxd_vdev.c [deleted file]
drivers/raw/ioat/meson.build

index 60438cc..2ad13db 100644 (file)
@@ -78,7 +78,7 @@ Example configuration for a work queue::
 
         $ accel-config config-wq dsa0/wq0.0 --group-id=0 \
            --mode=dedicated --priority=10 --wq-size=8 \
-           --type=user --name=app1
+           --type=user --name=dpdk_app1
 
 Once the devices have been configured, they need to be enabled::
 
@@ -114,15 +114,18 @@ the device driver on the EAL commandline, via the ``allowlist`` or ``-a`` flag e
 
        $ dpdk-test -a <b:d:f>,max_queues=4
 
-If the device is bound to the IDXD kernel driver (and previously configured with sysfs),
-then a specific work queue needs to be passed to the application via a vdev parameter.
-This vdev parameter take the driver name and work queue name as parameters.
-For example, to use work queue 0 on Intel\ |reg| DSA instance 0::
-
-        $ dpdk-test --no-pci --vdev=rawdev_idxd,wq=0.0
-
-Once probed successfully, the device will appear as a ``rawdev``, that is a
-"raw device type" inside DPDK, and can be accessed using APIs from the
+For devices bound to the IDXD kernel driver,
+the DPDK ioat driver will automatically perform a scan for available workqueues to use.
+Any workqueues found listed in ``/dev/dsa`` on the system will be checked in ``/sys``,
+and any which have ``dpdk_`` prefix in their name will be automatically probed by the
+driver to make them available to the application.
+Alternatively, to support use by multiple DPDK processes simultaneously,
+the value used as the DPDK ``--file-prefix`` parameter may be used as a workqueue name prefix,
+instead of ``dpdk_``,
+allowing each DPDK application instance to only use a subset of configured queues.
+
+Once probed successfully, irrespective of kernel driver, the device will appear as a ``rawdev``,
+that is a "raw device type" inside DPDK, and can be accessed using APIs from the
 ``rte_rawdev`` library.
 
 Using IOAT Rawdev Devices
diff --git a/drivers/raw/ioat/idxd_bus.c b/drivers/raw/ioat/idxd_bus.c
new file mode 100644 (file)
index 0000000..5b448d4
--- /dev/null
@@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include <dirent.h>
+#include <libgen.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <rte_bus.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include "ioat_private.h"
+
+/* default value for DSA paths, but allow override in environment for testing */
+#define DSA_DEV_PATH "/dev/dsa"
+#define DSA_SYSFS_PATH "/sys/bus/dsa/devices"
+
+/** unique identifier for a DSA device/WQ instance */
+struct dsa_wq_addr {
+       uint16_t device_id;
+       uint16_t wq_id;
+};
+
+/** a DSA device instance */
+struct rte_dsa_device {
+       struct rte_device device;           /**< Inherit core device */
+       TAILQ_ENTRY(rte_dsa_device) next;   /**< next dev in list */
+
+       char wq_name[32];                   /**< the workqueue name/number e.g. wq0.1 */
+       struct dsa_wq_addr addr;            /**< Identifies the specific WQ */
+};
+
+/* forward prototypes */
+struct dsa_bus;
+static int dsa_scan(void);
+static int dsa_probe(void);
+static struct rte_device *dsa_find_device(const struct rte_device *start,
+               rte_dev_cmp_t cmp,  const void *data);
+static enum rte_iova_mode dsa_get_iommu_class(void);
+static int dsa_addr_parse(const char *name, void *addr);
+
+/** List of devices */
+TAILQ_HEAD(dsa_device_list, rte_dsa_device);
+
+/**
+ * Structure describing the DSA bus
+ */
+struct dsa_bus {
+       struct rte_bus bus;               /**< Inherit the generic class */
+       struct rte_driver driver;         /**< Driver struct for devices to point to */
+       struct dsa_device_list device_list;  /**< List of PCI devices */
+};
+
+struct dsa_bus dsa_bus = {
+       .bus = {
+               .scan = dsa_scan,
+               .probe = dsa_probe,
+               .find_device = dsa_find_device,
+               .get_iommu_class = dsa_get_iommu_class,
+               .parse = dsa_addr_parse,
+       },
+       .driver = {
+               .name = "rawdev_idxd"
+       },
+       .device_list = TAILQ_HEAD_INITIALIZER(dsa_bus.device_list),
+};
+
+static inline const char *
+dsa_get_dev_path(void)
+{
+       const char *path = getenv("DSA_DEV_PATH");
+       return path ? path : DSA_DEV_PATH;
+}
+
+static inline const char *
+dsa_get_sysfs_path(void)
+{
+       const char *path = getenv("DSA_SYSFS_PATH");
+       return path ? path : DSA_SYSFS_PATH;
+}
+
+static const struct rte_rawdev_ops idxd_vdev_ops = {
+               .dev_close = idxd_rawdev_close,
+               .dev_selftest = ioat_rawdev_test,
+               .dump = idxd_dev_dump,
+               .dev_configure = idxd_dev_configure,
+               .dev_info_get = idxd_dev_info_get,
+               .xstats_get = ioat_xstats_get,
+               .xstats_get_names = ioat_xstats_get_names,
+               .xstats_reset = ioat_xstats_reset,
+};
+
+static void *
+idxd_vdev_mmap_wq(struct rte_dsa_device *dev)
+{
+       void *addr;
+       char path[PATH_MAX];
+       int fd;
+
+       snprintf(path, sizeof(path), "%s/%s", dsa_get_dev_path(), dev->wq_name);
+       fd = open(path, O_RDWR);
+       if (fd < 0) {
+               IOAT_PMD_ERR("Failed to open device path: %s", path);
+               return NULL;
+       }
+
+       addr = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED, fd, 0);
+       close(fd);
+       if (addr == MAP_FAILED) {
+               IOAT_PMD_ERR("Failed to mmap device %s", path);
+               return NULL;
+       }
+
+       return addr;
+}
+
+static int
+read_wq_string(struct rte_dsa_device *dev, const char *filename,
+               char *value, size_t valuelen)
+{
+       char sysfs_node[PATH_MAX];
+       int len;
+       int fd;
+
+       snprintf(sysfs_node, sizeof(sysfs_node), "%s/%s/%s",
+                       dsa_get_sysfs_path(), dev->wq_name, filename);
+       fd = open(sysfs_node, O_RDONLY);
+       if (fd < 0) {
+               IOAT_PMD_ERR("%s(): opening file '%s' failed: %s",
+                               __func__, sysfs_node, strerror(errno));
+               return -1;
+       }
+
+       len = read(fd, value, valuelen - 1);
+       close(fd);
+       if (len < 0) {
+               IOAT_PMD_ERR("%s(): error reading file '%s': %s",
+                               __func__, sysfs_node, strerror(errno));
+               return -1;
+       }
+       value[len] = '\0';
+       return 0;
+}
+
+static int
+read_wq_int(struct rte_dsa_device *dev, const char *filename,
+               int *value)
+{
+       char sysfs_node[PATH_MAX];
+       FILE *f;
+       int ret = 0;
+
+       snprintf(sysfs_node, sizeof(sysfs_node), "%s/%s/%s",
+                       dsa_get_sysfs_path(), dev->wq_name, filename);
+       f = fopen(sysfs_node, "r");
+       if (f == NULL) {
+               IOAT_PMD_ERR("%s(): opening file '%s' failed: %s",
+                               __func__, sysfs_node, strerror(errno));
+               return -1;
+       }
+
+       if (fscanf(f, "%d", value) != 1) {
+               IOAT_PMD_ERR("%s(): error reading file '%s': %s",
+                               __func__, sysfs_node, strerror(errno));
+               ret = -1;
+       }
+
+       fclose(f);
+       return ret;
+}
+
+static int
+read_device_int(struct rte_dsa_device *dev, const char *filename,
+               int *value)
+{
+       char sysfs_node[PATH_MAX];
+       FILE *f;
+       int ret = 0;
+
+       snprintf(sysfs_node, sizeof(sysfs_node), "%s/dsa%d/%s",
+                       dsa_get_sysfs_path(), dev->addr.device_id, filename);
+       f = fopen(sysfs_node, "r");
+       if (f == NULL) {
+               IOAT_PMD_ERR("%s(): opening file '%s' failed: %s",
+                               __func__, sysfs_node, strerror(errno));
+               return -1;
+       }
+
+       if (fscanf(f, "%d", value) != 1) {
+               IOAT_PMD_ERR("%s(): error reading file '%s': %s",
+                               __func__, sysfs_node, strerror(errno));
+               ret = -1;
+       }
+
+       fclose(f);
+       return ret;
+}
+
+static int
+idxd_rawdev_probe_dsa(struct rte_dsa_device *dev)
+{
+       struct idxd_rawdev idxd = {{0}}; /* double {} to avoid error on BSD12 */
+       int ret = 0;
+
+       IOAT_PMD_INFO("Probing device %s on numa node %d",
+                       dev->wq_name, dev->device.numa_node);
+       if (read_wq_int(dev, "size", &ret) < 0)
+               return -1;
+       idxd.max_batches = ret;
+       idxd.qid = dev->addr.wq_id;
+       idxd.u.vdev.dsa_id = dev->addr.device_id;
+
+       idxd.public.portal = idxd_vdev_mmap_wq(dev);
+       if (idxd.public.portal == NULL) {
+               IOAT_PMD_ERR("WQ mmap failed");
+               return -ENOENT;
+       }
+
+       ret = idxd_rawdev_create(dev->wq_name, &dev->device, &idxd, &idxd_vdev_ops);
+       if (ret) {
+               IOAT_PMD_ERR("Failed to create rawdev %s", dev->wq_name);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int
+is_for_this_process_use(const char *name)
+{
+       char *runtime_dir = strdup(rte_eal_get_runtime_dir());
+       char *prefix = basename(runtime_dir);
+       int prefixlen = strlen(prefix);
+       int retval = 0;
+
+       if (strncmp(name, "dpdk_", 5) == 0)
+               retval = 1;
+       if (strncmp(name, prefix, prefixlen) == 0 && name[prefixlen] == '_')
+               retval = 1;
+
+       free(runtime_dir);
+       return retval;
+}
+
+static int
+dsa_probe(void)
+{
+       struct rte_dsa_device *dev;
+
+       TAILQ_FOREACH(dev, &dsa_bus.device_list, next) {
+               char type[64], name[64];
+
+               if (read_wq_string(dev, "type", type, sizeof(type)) < 0 ||
+                               read_wq_string(dev, "name", name, sizeof(name)) < 0)
+                       continue;
+
+               if (strncmp(type, "user", 4) == 0 && is_for_this_process_use(name)) {
+                       dev->device.driver = &dsa_bus.driver;
+                       idxd_rawdev_probe_dsa(dev);
+                       continue;
+               }
+               IOAT_PMD_DEBUG("WQ '%s', not allocated to DPDK", dev->wq_name);
+       }
+
+       return 0;
+}
+
+static int
+dsa_scan(void)
+{
+       const char *path = dsa_get_dev_path();
+       struct dirent *wq;
+       DIR *dev_dir;
+
+       dev_dir = opendir(path);
+       if (dev_dir == NULL) {
+               if (errno == ENOENT)
+                       return 0; /* no bus, return without error */
+               IOAT_PMD_ERR("%s(): opendir '%s' failed: %s",
+                               __func__, path, strerror(errno));
+               return -1;
+       }
+
+       while ((wq = readdir(dev_dir)) != NULL) {
+               struct rte_dsa_device *dev;
+               int numa_node = -1;
+
+               if (strncmp(wq->d_name, "wq", 2) != 0)
+                       continue;
+               if (strnlen(wq->d_name, sizeof(dev->wq_name)) == sizeof(dev->wq_name)) {
+                       IOAT_PMD_ERR("%s(): wq name too long: '%s', skipping",
+                                       __func__, wq->d_name);
+                       continue;
+               }
+               IOAT_PMD_DEBUG("%s(): found %s/%s", __func__, path, wq->d_name);
+
+               dev = malloc(sizeof(*dev));
+               if (dsa_addr_parse(wq->d_name, &dev->addr) < 0) {
+                       IOAT_PMD_ERR("Error parsing WQ name: %s", wq->d_name);
+                       free(dev);
+                       continue;
+               }
+               dev->device.bus = &dsa_bus.bus;
+               strlcpy(dev->wq_name, wq->d_name, sizeof(dev->wq_name));
+               TAILQ_INSERT_TAIL(&dsa_bus.device_list, dev, next);
+
+               read_device_int(dev, "numa_node", &numa_node);
+               dev->device.numa_node = numa_node;
+       }
+
+       return 0;
+}
+
+static struct rte_device *
+dsa_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+                        const void *data)
+{
+       struct rte_dsa_device *dev = TAILQ_FIRST(&dsa_bus.device_list);
+
+       /* the rte_device struct must be at start of dsa structure */
+       RTE_BUILD_BUG_ON(offsetof(struct rte_dsa_device, device) != 0);
+
+       if (start != NULL) /* jump to start point if given */
+               dev = TAILQ_NEXT((const struct rte_dsa_device *)start, next);
+       while (dev != NULL) {
+               if (cmp(&dev->device, data) == 0)
+                       return &dev->device;
+               dev = TAILQ_NEXT(dev, next);
+       }
+       return NULL;
+}
+
+static enum rte_iova_mode
+dsa_get_iommu_class(void)
+{
+       return RTE_IOVA_VA;
+}
+
+static int
+dsa_addr_parse(const char *name, void *addr)
+{
+       struct dsa_wq_addr *wq = addr;
+       unsigned int device_id, wq_id;
+
+       if (sscanf(name, "wq%u.%u", &device_id, &wq_id) != 2) {
+               IOAT_PMD_DEBUG("Parsing WQ name failed: %s", name);
+               return -1;
+       }
+
+       wq->device_id = device_id;
+       wq->wq_id = wq_id;
+       return 0;
+}
+
+RTE_REGISTER_BUS(dsa, dsa_bus.bus);
diff --git a/drivers/raw/ioat/idxd_vdev.c b/drivers/raw/ioat/idxd_vdev.c
deleted file mode 100644 (file)
index 30a53b3..0000000
+++ /dev/null
@@ -1,231 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2020 Intel Corporation
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <limits.h>
-#include <sys/mman.h>
-
-#include <rte_memzone.h>
-#include <rte_bus_vdev.h>
-#include <rte_kvargs.h>
-#include <rte_string_fns.h>
-#include <rte_rawdev_pmd.h>
-
-#include "ioat_private.h"
-
-/** Name of the device driver */
-#define IDXD_PMD_RAWDEV_NAME rawdev_idxd
-/* takes a work queue(WQ) as parameter */
-#define IDXD_ARG_WQ            "wq"
-
-static const char * const valid_args[] = {
-       IDXD_ARG_WQ,
-       NULL
-};
-
-struct idxd_vdev_args {
-       uint8_t device_id;
-       uint8_t wq_id;
-};
-
-static const struct rte_rawdev_ops idxd_vdev_ops = {
-               .dev_close = idxd_rawdev_close,
-               .dev_selftest = ioat_rawdev_test,
-               .dump = idxd_dev_dump,
-               .dev_configure = idxd_dev_configure,
-               .dev_info_get = idxd_dev_info_get,
-               .xstats_get = ioat_xstats_get,
-               .xstats_get_names = ioat_xstats_get_names,
-               .xstats_reset = ioat_xstats_reset,
-};
-
-static void *
-idxd_vdev_mmap_wq(struct idxd_vdev_args *args)
-{
-       void *addr;
-       char path[PATH_MAX];
-       int fd;
-
-       snprintf(path, sizeof(path), "/dev/dsa/wq%u.%u",
-                       args->device_id, args->wq_id);
-       fd = open(path, O_RDWR);
-       if (fd < 0) {
-               IOAT_PMD_ERR("Failed to open device path");
-               return NULL;
-       }
-
-       addr = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED, fd, 0);
-       close(fd);
-       if (addr == MAP_FAILED) {
-               IOAT_PMD_ERR("Failed to mmap device");
-               return NULL;
-       }
-
-       return addr;
-}
-
-static int
-idxd_rawdev_parse_wq(const char *key __rte_unused, const char *value,
-                         void *extra_args)
-{
-       struct idxd_vdev_args *args = (struct idxd_vdev_args *)extra_args;
-       int dev, wq, bytes = -1;
-       int read = sscanf(value, "%d.%d%n", &dev, &wq, &bytes);
-
-       if (read != 2 || bytes != (int)strlen(value)) {
-               IOAT_PMD_ERR("Error parsing work-queue id. Must be in <dev_id>.<queue_id> format");
-               return -EINVAL;
-       }
-
-       if (dev >= UINT8_MAX || wq >= UINT8_MAX) {
-               IOAT_PMD_ERR("Device or work queue id out of range");
-               return -EINVAL;
-       }
-
-       args->device_id = dev;
-       args->wq_id = wq;
-
-       return 0;
-}
-
-static int
-idxd_vdev_parse_params(struct rte_kvargs *kvlist, struct idxd_vdev_args *args)
-{
-       int ret = 0;
-
-       if (rte_kvargs_count(kvlist, IDXD_ARG_WQ) == 1) {
-               if (rte_kvargs_process(kvlist, IDXD_ARG_WQ,
-                               &idxd_rawdev_parse_wq, args) < 0) {
-                       IOAT_PMD_ERR("Error parsing %s", IDXD_ARG_WQ);
-                       ret = -EINVAL;
-               }
-       } else {
-               IOAT_PMD_ERR("%s is a mandatory arg", IDXD_ARG_WQ);
-               ret = -EINVAL;
-       }
-
-       rte_kvargs_free(kvlist);
-       return ret;
-}
-
-static int
-idxd_vdev_get_max_batches(struct idxd_vdev_args *args)
-{
-       char sysfs_path[PATH_MAX];
-       FILE *f;
-       int ret;
-
-       snprintf(sysfs_path, sizeof(sysfs_path),
-                       "/sys/bus/dsa/devices/wq%u.%u/size",
-                       args->device_id, args->wq_id);
-       f = fopen(sysfs_path, "r");
-       if (f == NULL)
-               return -1;
-
-       if (fscanf(f, "%d", &ret) != 1)
-               ret = -1;
-
-       fclose(f);
-       return ret;
-}
-
-static int
-idxd_rawdev_probe_vdev(struct rte_vdev_device *vdev)
-{
-       struct rte_kvargs *kvlist;
-       struct idxd_rawdev idxd = {{0}}; /* double {} to avoid error on BSD12 */
-       struct idxd_vdev_args vdev_args;
-       const char *name;
-       int ret = 0;
-
-       name = rte_vdev_device_name(vdev);
-       if (name == NULL)
-               return -EINVAL;
-
-       IOAT_PMD_INFO("Initializing pmd_idxd for %s", name);
-
-       kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_args);
-       if (kvlist == NULL) {
-               IOAT_PMD_ERR("Invalid kvargs key");
-               return -EINVAL;
-       }
-
-       ret = idxd_vdev_parse_params(kvlist, &vdev_args);
-       if (ret) {
-               IOAT_PMD_ERR("Failed to parse kvargs");
-               return -EINVAL;
-       }
-
-       idxd.qid = vdev_args.wq_id;
-       idxd.u.vdev.dsa_id = vdev_args.device_id;
-       idxd.max_batches = idxd_vdev_get_max_batches(&vdev_args);
-
-       idxd.public.portal = idxd_vdev_mmap_wq(&vdev_args);
-       if (idxd.public.portal == NULL) {
-               IOAT_PMD_ERR("WQ mmap failed");
-               return -ENOENT;
-       }
-
-       ret = idxd_rawdev_create(name, &vdev->device, &idxd, &idxd_vdev_ops);
-       if (ret) {
-               IOAT_PMD_ERR("Failed to create rawdev %s", name);
-               return ret;
-       }
-
-       return 0;
-}
-
-static int
-idxd_rawdev_remove_vdev(struct rte_vdev_device *vdev)
-{
-       struct idxd_rawdev *idxd;
-       const char *name;
-       struct rte_rawdev *rdev;
-       int ret = 0;
-
-       name = rte_vdev_device_name(vdev);
-       if (name == NULL)
-               return -EINVAL;
-
-       IOAT_PMD_INFO("Remove DSA vdev %p", name);
-
-       rdev = rte_rawdev_pmd_get_named_dev(name);
-       if (!rdev) {
-               IOAT_PMD_ERR("Invalid device name (%s)", name);
-               return -EINVAL;
-       }
-
-       idxd = rdev->dev_private;
-
-       /* free context and memory */
-       if (rdev->dev_private != NULL) {
-               IOAT_PMD_DEBUG("Freeing device driver memory");
-               rdev->dev_private = NULL;
-
-               if (munmap(idxd->public.portal, 0x1000) < 0) {
-                       IOAT_PMD_ERR("Error unmapping portal");
-                       ret = -errno;
-               }
-
-               rte_free(idxd->public.batch_ring);
-               rte_free(idxd->public.hdl_ring);
-
-               rte_memzone_free(idxd->mz);
-       }
-
-       if (rte_rawdev_pmd_release(rdev))
-               IOAT_PMD_ERR("Device cleanup failed");
-
-       return ret;
-}
-
-struct rte_vdev_driver idxd_rawdev_drv_vdev = {
-       .probe = idxd_rawdev_probe_vdev,
-       .remove = idxd_rawdev_remove_vdev,
-};
-
-RTE_PMD_REGISTER_VDEV(IDXD_PMD_RAWDEV_NAME, idxd_rawdev_drv_vdev);
-RTE_PMD_REGISTER_PARAM_STRING(IDXD_PMD_RAWDEV_NAME,
-                             "wq=<string>");
index 3b8ea65..6382a82 100644 (file)
@@ -4,13 +4,13 @@
 build = dpdk_conf.has('RTE_ARCH_X86')
 reason = 'only supported on x86'
 sources = files(
+        'idxd_bus.c',
         'idxd_pci.c',
-        'idxd_vdev.c',
         'ioat_common.c',
         'ioat_rawdev.c',
         'ioat_rawdev_test.c',
 )
-deps += ['bus_pci', 'bus_vdev', 'mbuf', 'rawdev']
+deps += ['bus_pci', 'mbuf', 'rawdev']
 headers = files(
         'rte_ioat_rawdev.h',
         'rte_ioat_rawdev_fns.h',