net/failsafe: fix sub-device ownership race
authorMatan Azrad <matan@mellanox.com>
Thu, 10 May 2018 23:58:35 +0000 (01:58 +0200)
committerFerruh Yigit <ferruh.yigit@intel.com>
Mon, 14 May 2018 21:31:53 +0000 (22:31 +0100)
There is time between the sub-device port probing by the sub-device PMD
to the sub-device port ownership taking by a fail-safe port.

In this time, the port is available for the application usage. For
example, the port will be exposed to the applications which use
RTE_ETH_FOREACH_DEV iterator.

Thus, ownership unaware applications may manage the port in this time
what may cause a lot of problematic behaviors in the fail-safe
sub-device initialization.

Register to the ethdev NEW event to take the sub-device port ownership
before it becomes exposed to the application.

Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org
Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
drivers/net/failsafe/failsafe.c
drivers/net/failsafe/failsafe_eal.c
drivers/net/failsafe/failsafe_ether.c
drivers/net/failsafe/failsafe_private.h

index b35471d..eafbb75 100644 (file)
@@ -204,16 +204,25 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
        }
        snprintf(priv->my_owner.name, sizeof(priv->my_owner.name),
                 FAILSAFE_OWNER_NAME);
+       DEBUG("Failsafe port %u owner info: %s_%016"PRIX64, dev->data->port_id,
+             priv->my_owner.name, priv->my_owner.id);
+       ret = rte_eth_dev_callback_register(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+                                           failsafe_eth_new_event_callback,
+                                           dev);
+       if (ret) {
+               ERROR("Failed to register NEW callback");
+               goto free_args;
+       }
        ret = failsafe_eal_init(dev);
        if (ret)
-               goto free_args;
+               goto unregister_new_callback;
        ret = fs_mutex_init(priv);
        if (ret)
-               goto free_args;
+               goto unregister_new_callback;
        ret = failsafe_hotplug_alarm_install(dev);
        if (ret) {
                ERROR("Could not set up plug-in event detection");
-               goto free_args;
+               goto unregister_new_callback;
        }
        mac = &dev->data->mac_addrs[0];
        if (mac_from_arg) {
@@ -263,6 +272,9 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
        return 0;
 cancel_alarm:
        failsafe_hotplug_alarm_cancel(dev);
+unregister_new_callback:
+       rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+                                       failsafe_eth_new_event_callback, dev);
 free_args:
        failsafe_args_free(dev);
 free_subs:
@@ -282,6 +294,8 @@ fs_rte_eth_free(const char *name)
        dev = rte_eth_dev_allocated(name);
        if (dev == NULL)
                return -ENODEV;
+       rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+                                       failsafe_eth_new_event_callback, dev);
        ret = failsafe_eal_uninit(dev);
        if (ret)
                ERROR("Error while uninitializing sub-EAL");
index ce76770..5672f39 100644 (file)
@@ -18,8 +18,9 @@ fs_ethdev_portid_get(const char *name, uint16_t *port_id)
                return -EINVAL;
        }
        len = strlen(name);
-       RTE_ETH_FOREACH_DEV(pid) {
-               if (!strncmp(name, rte_eth_devices[pid].device->name, len)) {
+       for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
+               if (rte_eth_dev_is_valid_port(pid) &&
+                   !strncmp(name, rte_eth_devices[pid].device->name, len)) {
                        *port_id = pid;
                        return 0;
                }
@@ -41,6 +42,8 @@ fs_bus_init(struct rte_eth_dev *dev)
                        continue;
                da = &sdev->devargs;
                if (fs_ethdev_portid_get(da->name, &pid) != 0) {
+                       struct rte_eth_dev_owner pid_owner;
+
                        ret = rte_eal_hotplug_add(da->bus->name,
                                                  da->name,
                                                  da->args);
@@ -55,12 +58,26 @@ fs_bus_init(struct rte_eth_dev *dev)
                                ERROR("sub_device %d init went wrong", i);
                                return -ENODEV;
                        }
+                       /*
+                        * The NEW callback tried to take ownership, check
+                        * whether it succeed or didn't.
+                        */
+                       rte_eth_dev_owner_get(pid, &pid_owner);
+                       if (pid_owner.id != PRIV(dev)->my_owner.id) {
+                               INFO("sub_device %d owner(%s_%016"PRIX64") is not my,"
+                                    " owner(%s_%016"PRIX64"), will try again later",
+                                    i, pid_owner.name, pid_owner.id,
+                                    PRIV(dev)->my_owner.name,
+                                    PRIV(dev)->my_owner.id);
+                               continue;
+                       }
                } else {
+                       /* The sub-device port was found. */
                        char devstr[DEVARGS_MAXLEN] = "";
                        struct rte_devargs *probed_da =
                                        rte_eth_devices[pid].device->devargs;
 
-                       /* Take control of device probed by EAL options. */
+                       /* Take control of probed device. */
                        free(da->args);
                        memset(da, 0, sizeof(*da));
                        if (probed_da != NULL)
@@ -77,22 +94,23 @@ fs_bus_init(struct rte_eth_dev *dev)
                        }
                        INFO("Taking control of a probed sub device"
                              " %d named %s", i, da->name);
-               }
-               ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
-               if (ret < 0) {
-                       INFO("sub_device %d owner set failed (%s),"
-                            " will try again later", i, strerror(-ret));
-                       continue;
-               } else if (strncmp(rte_eth_devices[pid].device->name, da->name,
-                          strlen(da->name)) != 0) {
-                       /*
-                        * The device probably was removed and its port id was
-                        * reallocated before ownership set.
-                        */
-                       rte_eth_dev_owner_unset(pid, PRIV(dev)->my_owner.id);
-                       INFO("sub_device %d was probably removed before taking"
-                            " ownership, will try again later", i);
-                       continue;
+                       ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
+                       if (ret < 0) {
+                               INFO("sub_device %d owner set failed (%s), "
+                                    "will try again later", i, strerror(-ret));
+                               continue;
+                       } else if (strncmp(rte_eth_devices[pid].device->name,
+                                  da->name, strlen(da->name)) != 0) {
+                               /*
+                                * The device probably was removed and its port
+                                * id was reallocated before ownership set.
+                                */
+                               rte_eth_dev_owner_unset(pid,
+                                                       PRIV(dev)->my_owner.id);
+                               INFO("sub_device %d was removed before taking"
+                                    " ownership, will try again later", i);
+                               continue;
+                       }
                }
                ETH(sdev) = &rte_eth_devices[pid];
                SUB_ID(sdev) = i;
index b414a78..733e95d 100644 (file)
@@ -463,3 +463,26 @@ failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
        else
                return 0;
 }
+
+/* Take sub-device ownership before it becomes exposed to the application. */
+int
+failsafe_eth_new_event_callback(uint16_t port_id,
+                               enum rte_eth_event_type event __rte_unused,
+                               void *cb_arg, void *out __rte_unused)
+{
+       struct rte_eth_dev *fs_dev = cb_arg;
+       struct sub_device *sdev;
+       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+       uint8_t i;
+
+       FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
+               if (sdev->state >= DEV_PROBED)
+                       continue;
+               if (strcmp(sdev->devargs.name, dev->device->name) != 0)
+                       continue;
+               rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
+               /* The actual owner will be checked after the port probing. */
+               break;
+       }
+       return 0;
+}
index b54f8e3..7e6a3f8 100644 (file)
@@ -220,6 +220,9 @@ int failsafe_eth_rmv_event_callback(uint16_t port_id,
 int failsafe_eth_lsc_event_callback(uint16_t port_id,
                                    enum rte_eth_event_type event,
                                    void *cb_arg, void *out);
+int failsafe_eth_new_event_callback(uint16_t port_id,
+                                   enum rte_eth_event_type event,
+                                   void *cb_arg, void *out);
 
 /* GLOBALS */