devargs: fix freeing during device removal
[dpdk.git] / drivers / bus / pci / pci_common.c
index 6bed0bc..feb7980 100644 (file)
@@ -6,6 +6,7 @@
 #include <string.h>
 #include <inttypes.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/queue.h>
 #include <rte_string_fns.h>
 #include <rte_common.h>
 #include <rte_devargs.h>
+#include <rte_vfio.h>
 
 #include "private.h"
 
-extern struct rte_pci_bus rte_pci_bus;
 
 #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
 
@@ -69,7 +70,7 @@ pci_name_set(struct rte_pci_device *dev)
         */
        if (devargs != NULL)
                /* If an rte_devargs exists, the generic rte_device uses the
-                * given name as its namea
+                * given name as its name.
                 */
                dev->device.name = dev->device.devargs->name;
        else
@@ -122,6 +123,7 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
                         struct rte_pci_device *dev)
 {
        int ret;
+       bool already_probed;
        struct rte_pci_addr *loc;
 
        if ((dr == NULL) || (dev == NULL))
@@ -152,25 +154,39 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
                dev->device.numa_node = 0;
        }
 
+       already_probed = rte_dev_is_probed(&dev->device);
+       if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
+               RTE_LOG(DEBUG, EAL, "Device %s is already probed\n",
+                               dev->device.name);
+               return -EEXIST;
+       }
+
        RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
                dev->id.device_id, dr->driver.name);
 
-       if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
+       /*
+        * reference driver structure
+        * This needs to be before rte_pci_map_device(), as it enables to use
+        * driver flags for adjusting configuration.
+        */
+       if (!already_probed)
+               dev->driver = dr;
+
+       if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
                /* map resources for devices that use igb_uio */
                ret = rte_pci_map_device(dev);
-               if (ret != 0)
+               if (ret != 0) {
+                       dev->driver = NULL;
                        return ret;
+               }
        }
 
-       /* reference driver structure */
-       dev->driver = dr;
-       dev->device.driver = &dr->driver;
-
        /* call the driver probe() function */
        ret = dr->probe(dr, dev);
+       if (already_probed)
+               return ret; /* no rollback if already succeeded earlier */
        if (ret) {
                dev->driver = NULL;
-               dev->device.driver = NULL;
                if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
                        /* Don't unmap if device is unsupported and
                         * driver needs mapped resources.
@@ -178,6 +194,8 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
                        !(ret > 0 &&
                                (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
                        rte_pci_unmap_device(dev);
+       } else {
+               dev->device.driver = &dr->driver;
        }
 
        return ret;
@@ -237,10 +255,6 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
        if (dev == NULL)
                return -1;
 
-       /* Check if a driver is already loaded */
-       if (dev->driver != NULL)
-               return 0;
-
        FOREACH_DRIVER_ON_PCIBUS(dr) {
                rc = rte_pci_probe_one_driver(dr, dev);
                if (rc < 0)
@@ -254,81 +268,6 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
        return 1;
 }
 
-/*
- * Find the pci device specified by pci address, then invoke probe function of
- * the driver of the device.
- */
-int
-rte_pci_probe_one(const struct rte_pci_addr *addr)
-{
-       struct rte_pci_device *dev = NULL;
-
-       int ret = 0;
-
-       if (addr == NULL)
-               return -1;
-
-       /* update current pci device in global list, kernel bindings might have
-        * changed since last time we looked at it.
-        */
-       if (pci_update_device(addr) < 0)
-               goto err_return;
-
-       FOREACH_DEVICE_ON_PCIBUS(dev) {
-               if (rte_pci_addr_cmp(&dev->addr, addr))
-                       continue;
-
-               ret = pci_probe_all_drivers(dev);
-               if (ret)
-                       goto err_return;
-               return 0;
-       }
-       return -1;
-
-err_return:
-       RTE_LOG(WARNING, EAL,
-               "Requested device " PCI_PRI_FMT " cannot be used\n",
-               addr->domain, addr->bus, addr->devid, addr->function);
-       return -1;
-}
-
-/*
- * Detach device specified by its pci address.
- */
-int
-rte_pci_detach(const struct rte_pci_addr *addr)
-{
-       struct rte_pci_device *dev = NULL;
-       int ret = 0;
-
-       if (addr == NULL)
-               return -1;
-
-       FOREACH_DEVICE_ON_PCIBUS(dev) {
-               if (rte_pci_addr_cmp(&dev->addr, addr))
-                       continue;
-
-               ret = rte_pci_detach_dev(dev);
-               if (ret < 0)
-                       /* negative value is an error */
-                       goto err_return;
-               if (ret > 0)
-                       /* positive value means driver doesn't support it */
-                       continue;
-
-               rte_pci_remove_device(dev);
-               free(dev);
-               return 0;
-       }
-       return -1;
-
-err_return:
-       RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
-                       " cannot be used\n", dev->addr.domain, dev->addr.bus,
-                       dev->addr.devid, dev->addr.function);
-       return -1;
-}
-
 /*
  * Scan the content of the PCI bus, and call the probe() function for
  * all registered drivers that have a matching entry in its id_table
@@ -357,11 +296,14 @@ rte_pci_probe(void)
                        devargs->policy == RTE_DEV_WHITELISTED)
                        ret = pci_probe_all_drivers(dev);
                if (ret < 0) {
-                       RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT
-                                " cannot be used\n", dev->addr.domain, dev->addr.bus,
-                                dev->addr.devid, dev->addr.function);
-                       rte_errno = errno;
-                       failed++;
+                       if (ret != -EEXIST) {
+                               RTE_LOG(ERR, EAL, "Requested device "
+                                       PCI_PRI_FMT " cannot be used\n",
+                                       dev->addr.domain, dev->addr.bus,
+                                       dev->addr.devid, dev->addr.function);
+                               rte_errno = errno;
+                               failed++;
+                       }
                        ret = 0;
                }
        }
@@ -445,7 +387,7 @@ rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
 }
 
 /* Remove a device from PCI bus */
-void
+static void
 rte_pci_remove_device(struct rte_pci_device *pci_dev)
 {
        TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
@@ -455,20 +397,115 @@ static struct rte_device *
 pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
                const void *data)
 {
-       struct rte_pci_device *dev;
+       const struct rte_pci_device *pstart;
+       struct rte_pci_device *pdev;
 
-       FOREACH_DEVICE_ON_PCIBUS(dev) {
-               if (start && &dev->device == start) {
-                       start = NULL; /* starting point found */
-                       continue;
-               }
-               if (cmp(&dev->device, data) == 0)
-                       return &dev->device;
+       if (start != NULL) {
+               pstart = RTE_DEV_TO_PCI_CONST(start);
+               pdev = TAILQ_NEXT(pstart, next);
+       } else {
+               pdev = TAILQ_FIRST(&rte_pci_bus.device_list);
        }
+       while (pdev != NULL) {
+               if (cmp(&pdev->device, data) == 0)
+                       return &pdev->device;
+               pdev = TAILQ_NEXT(pdev, next);
+       }
+       return NULL;
+}
 
+/*
+ * find the device which encounter the failure, by iterate over all device on
+ * PCI bus to check if the memory failure address is located in the range
+ * of the BARs of the device.
+ */
+static struct rte_pci_device *
+pci_find_device_by_addr(const void *failure_addr)
+{
+       struct rte_pci_device *pdev = NULL;
+       uint64_t check_point, start, end, len;
+       int i;
+
+       check_point = (uint64_t)(uintptr_t)failure_addr;
+
+       FOREACH_DEVICE_ON_PCIBUS(pdev) {
+               for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) {
+                       start = (uint64_t)(uintptr_t)pdev->mem_resource[i].addr;
+                       len = pdev->mem_resource[i].len;
+                       end = start + len;
+                       if (check_point >= start && check_point < end) {
+                               RTE_LOG(DEBUG, EAL, "Failure address %16.16"
+                                       PRIx64" belongs to device %s!\n",
+                                       check_point, pdev->device.name);
+                               return pdev;
+                       }
+               }
+       }
        return NULL;
 }
 
+static int
+pci_hot_unplug_handler(struct rte_device *dev)
+{
+       struct rte_pci_device *pdev = NULL;
+       int ret = 0;
+
+       pdev = RTE_DEV_TO_PCI(dev);
+       if (!pdev)
+               return -1;
+
+       switch (pdev->kdrv) {
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       case RTE_KDRV_VFIO:
+               /*
+                * vfio kernel module guaranty the pci device would not be
+                * deleted until the user space release the resource, so no
+                * need to remap BARs resource here, just directly notify
+                * the req event to the user space to handle it.
+                */
+               rte_dev_event_callback_process(dev->name,
+                                              RTE_DEV_EVENT_REMOVE);
+               break;
+#endif
+       case RTE_KDRV_IGB_UIO:
+       case RTE_KDRV_UIO_GENERIC:
+       case RTE_KDRV_NIC_UIO:
+               /* BARs resource is invalid, remap it to be safe. */
+               ret = pci_uio_remap_resource(pdev);
+               break;
+       default:
+               RTE_LOG(DEBUG, EAL,
+                       "Not managed by a supported kernel driver, skipped\n");
+               ret = -1;
+               break;
+       }
+
+       return ret;
+}
+
+static int
+pci_sigbus_handler(const void *failure_addr)
+{
+       struct rte_pci_device *pdev = NULL;
+       int ret = 0;
+
+       pdev = pci_find_device_by_addr(failure_addr);
+       if (!pdev) {
+               /* It is a generic sigbus error, no bus would handle it. */
+               ret = 1;
+       } else {
+               /* The sigbus error is caused of hot-unplug. */
+               ret = pci_hot_unplug_handler(&pdev->device);
+               if (ret) {
+                       RTE_LOG(ERR, EAL,
+                               "Failed to handle hot-unplug for device %s",
+                               pdev->name);
+                       ret = -1;
+               }
+       }
+       return ret;
+}
+
 static int
 pci_plug(struct rte_device *dev)
 {
@@ -485,6 +522,7 @@ pci_unplug(struct rte_device *dev)
        ret = rte_pci_detach_dev(pdev);
        if (ret == 0) {
                rte_pci_remove_device(pdev);
+               rte_devargs_remove(dev->devargs);
                free(pdev);
        }
        return ret;
@@ -499,6 +537,9 @@ struct rte_pci_bus rte_pci_bus = {
                .unplug = pci_unplug,
                .parse = pci_parse,
                .get_iommu_class = rte_pci_get_iommu_class,
+               .dev_iterate = rte_pci_dev_iterate,
+               .hot_unplug_handler = pci_hot_unplug_handler,
+               .sigbus_handler = pci_sigbus_handler,
        },
        .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
        .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),