ethdev: add port ownership
authorMatan Azrad <matan@mellanox.com>
Mon, 22 Jan 2018 16:38:19 +0000 (16:38 +0000)
committerThomas Monjalon <thomas@monjalon.net>
Wed, 31 Jan 2018 19:48:53 +0000 (20:48 +0100)
The ownership of a port is implicit in DPDK.
Making it explicit is better from the next reasons:
1. It will define well who is in charge of the port usage synchronization.
2. A library could work on top of a port.
3. A port can work on top of another port.

Also in the fail-safe case, an issue has been met in testpmd.
We need to check that the application is not trying to use a port which
is already managed by fail-safe.

A port owner is built from owner id(number) and owner name(string) while
the owner id must be unique to distinguish between two identical entity
instances and the owner name can be any name.
The name helps to logically recognize the owner by different DPDK
entities and allows easy debug.
Each DPDK entity can allocate an owner unique identifier and can use it
and its preferred name to owns valid ethdev ports.
Each DPDK entity can get any port owner status to decide if it can
manage the port or not.

The mechanism is synchronized for both the primary process threads and
the secondary processes threads to allow secondary process entity to be
a port owner.

Add a synchronized ownership mechanism to DPDK Ethernet devices to
avoid multiple management of a device by different DPDK entities.

The current ethdev internal port management is not affected by this
feature.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
doc/guides/prog_guide/poll_mode_drv.rst
lib/librte_ether/rte_ethdev.c
lib/librte_ether/rte_ethdev.h
lib/librte_ether/rte_ethdev_core.h
lib/librte_ether/rte_ethdev_version.map

index d1d4b1c..d513ee3 100644 (file)
@@ -156,8 +156,8 @@ concurrently on the same tx queue without SW lock. This PMD feature found in som
 
 See `Hardware Offload`_ for ``DEV_TX_OFFLOAD_MT_LOCKFREE`` capability probing details.
 
-Device Identification and Configuration
----------------------------------------
+Device Identification, Ownership and Configuration
+--------------------------------------------------
 
 Device Identification
 ~~~~~~~~~~~~~~~~~~~~~
@@ -171,6 +171,16 @@ Based on their PCI identifier, NIC ports are assigned two other identifiers:
 *   A port name used to designate the port in console messages, for administration or debugging purposes.
     For ease of use, the port name includes the port index.
 
+Port Ownership
+~~~~~~~~~~~~~~
+The Ethernet devices ports can be owned by a single DPDK entity (application, library, PMD, process, etc).
+The ownership mechanism is controlled by ethdev APIs and allows to set/remove/get a port owner by DPDK entities.
+Allowing this should prevent any multiple management of Ethernet port by different entities.
+
+.. note::
+
+    It is the DPDK entity responsibility to set the port owner before using it and to manage the port usage synchronization between different threads or processes.
+
 Device Configuration
 ~~~~~~~~~~~~~~~~~~~~
 
index e610d62..b1d38cf 100644 (file)
@@ -43,7 +43,6 @@
 
 static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
 struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
-static struct rte_eth_dev_data *rte_eth_dev_data;
 static uint8_t eth_dev_last_created_port;
 
 /* spinlock for eth device callbacks */
@@ -55,12 +54,22 @@ static rte_spinlock_t rte_eth_rx_cb_lock = RTE_SPINLOCK_INITIALIZER;
 /* spinlock for add/remove tx callbacks */
 static rte_spinlock_t rte_eth_tx_cb_lock = RTE_SPINLOCK_INITIALIZER;
 
+/* spinlock for shared data allocation */
+static rte_spinlock_t rte_eth_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
 /* store statistics names and its offset in stats structure  */
 struct rte_eth_xstats_name_off {
        char name[RTE_ETH_XSTATS_NAME_SIZE];
        unsigned offset;
 };
 
+/* Shared memory between primary and secondary processes. */
+static struct {
+       uint64_t next_owner_id;
+       rte_spinlock_t ownership_lock;
+       struct rte_eth_dev_data data[RTE_MAX_ETHPORTS];
+} *rte_eth_dev_shared_data;
+
 static const struct rte_eth_xstats_name_off rte_stats_strings[] = {
        {"rx_good_packets", offsetof(struct rte_eth_stats, ipackets)},
        {"tx_good_packets", offsetof(struct rte_eth_stats, opackets)},
@@ -182,24 +191,35 @@ rte_eth_find_next(uint16_t port_id)
 }
 
 static void
-rte_eth_dev_data_alloc(void)
+rte_eth_dev_shared_data_prepare(void)
 {
        const unsigned flags = 0;
        const struct rte_memzone *mz;
 
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-               mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA,
-                               RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data),
-                               rte_socket_id(), flags);
-       } else
-               mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA);
-       if (mz == NULL)
-               rte_panic("Cannot allocate memzone for ethernet port data\n");
+       rte_spinlock_lock(&rte_eth_shared_data_lock);
+
+       if (rte_eth_dev_shared_data == NULL) {
+               if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+                       /* Allocate port data and ownership shared memory. */
+                       mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA,
+                                       sizeof(*rte_eth_dev_shared_data),
+                                       rte_socket_id(), flags);
+               } else
+                       mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA);
+               if (mz == NULL)
+                       rte_panic("Cannot allocate ethdev shared data\n");
+
+               rte_eth_dev_shared_data = mz->addr;
+               if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+                       rte_eth_dev_shared_data->next_owner_id =
+                                       RTE_ETH_DEV_NO_OWNER + 1;
+                       rte_spinlock_init(&rte_eth_dev_shared_data->ownership_lock);
+                       memset(rte_eth_dev_shared_data->data, 0,
+                              sizeof(rte_eth_dev_shared_data->data));
+               }
+       }
 
-       rte_eth_dev_data = mz->addr;
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               memset(rte_eth_dev_data, 0,
-                               RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data));
+       rte_spinlock_unlock(&rte_eth_shared_data_lock);
 }
 
 struct rte_eth_dev *
@@ -222,7 +242,7 @@ rte_eth_dev_find_free_port(void)
 
        for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
                /* Using shared name field to find a free port. */
-               if (rte_eth_dev_data[i].name[0] == '\0') {
+               if (rte_eth_dev_shared_data->data[i].name[0] == '\0') {
                        RTE_ASSERT(rte_eth_devices[i].state ==
                                   RTE_ETH_DEV_UNUSED);
                        return i;
@@ -236,7 +256,7 @@ eth_dev_get(uint16_t port_id)
 {
        struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id];
 
-       eth_dev->data = &rte_eth_dev_data[port_id];
+       eth_dev->data = &rte_eth_dev_shared_data->data[port_id];
        eth_dev->state = RTE_ETH_DEV_ATTACHED;
 
        eth_dev_last_created_port = port_id;
@@ -256,8 +276,7 @@ rte_eth_dev_allocate(const char *name)
                return NULL;
        }
 
-       if (rte_eth_dev_data == NULL)
-               rte_eth_dev_data_alloc();
+       rte_eth_dev_shared_data_prepare();
 
        if (rte_eth_dev_allocated(name) != NULL) {
                RTE_PMD_DEBUG_TRACE("Ethernet Device with name %s already allocated!\n",
@@ -286,11 +305,10 @@ rte_eth_dev_attach_secondary(const char *name)
        uint16_t i;
        struct rte_eth_dev *eth_dev;
 
-       if (rte_eth_dev_data == NULL)
-               rte_eth_dev_data_alloc();
+       rte_eth_dev_shared_data_prepare();
 
        for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
-               if (strcmp(rte_eth_dev_data[i].name, name) == 0)
+               if (strcmp(rte_eth_dev_shared_data->data[i].name, name) == 0)
                        break;
        }
        if (i == RTE_MAX_ETHPORTS) {
@@ -312,9 +330,16 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
        if (eth_dev == NULL)
                return -EINVAL;
 
-       memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+       rte_eth_dev_shared_data_prepare();
+
+       rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
        eth_dev->state = RTE_ETH_DEV_UNUSED;
 
+       memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+
+       rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+
        _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
 
        return 0;
@@ -330,6 +355,154 @@ rte_eth_dev_is_valid_port(uint16_t port_id)
                return 1;
 }
 
+static int
+rte_eth_is_valid_owner_id(uint64_t owner_id)
+{
+       if (owner_id == RTE_ETH_DEV_NO_OWNER ||
+           rte_eth_dev_shared_data->next_owner_id <= owner_id) {
+               RTE_PMD_DEBUG_TRACE("Invalid owner_id=%016lX.\n", owner_id);
+               return 0;
+       }
+       return 1;
+}
+
+uint64_t __rte_experimental
+rte_eth_find_next_owned_by(uint16_t port_id, const uint64_t owner_id)
+{
+       while (port_id < RTE_MAX_ETHPORTS &&
+              ((rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
+              rte_eth_devices[port_id].state != RTE_ETH_DEV_REMOVED) ||
+              rte_eth_devices[port_id].data->owner.id != owner_id))
+               port_id++;
+
+       if (port_id >= RTE_MAX_ETHPORTS)
+               return RTE_MAX_ETHPORTS;
+
+       return port_id;
+}
+
+int __rte_experimental
+rte_eth_dev_owner_new(uint64_t *owner_id)
+{
+       rte_eth_dev_shared_data_prepare();
+
+       rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+       *owner_id = rte_eth_dev_shared_data->next_owner_id++;
+
+       rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+       return 0;
+}
+
+static int
+_rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
+                      const struct rte_eth_dev_owner *new_owner)
+{
+       struct rte_eth_dev_owner *port_owner;
+       int sret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+       if (!rte_eth_is_valid_owner_id(new_owner->id) &&
+           !rte_eth_is_valid_owner_id(old_owner_id))
+               return -EINVAL;
+
+       port_owner = &rte_eth_devices[port_id].data->owner;
+       if (port_owner->id != old_owner_id) {
+               RTE_PMD_DEBUG_TRACE("Cannot set owner to port %d already owned"
+                                   " by %s_%016lX.\n", port_id,
+                                   port_owner->name, port_owner->id);
+               return -EPERM;
+       }
+
+       sret = snprintf(port_owner->name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s",
+                       new_owner->name);
+       if (sret < 0 || sret >= RTE_ETH_MAX_OWNER_NAME_LEN)
+               RTE_PMD_DEBUG_TRACE("Port %d owner name was truncated.\n",
+                                   port_id);
+
+       port_owner->id = new_owner->id;
+
+       RTE_PMD_DEBUG_TRACE("Port %d owner is %s_%016lX.\n", port_id,
+                           new_owner->name, new_owner->id);
+
+       return 0;
+}
+
+int __rte_experimental
+rte_eth_dev_owner_set(const uint16_t port_id,
+                     const struct rte_eth_dev_owner *owner)
+{
+       int ret;
+
+       rte_eth_dev_shared_data_prepare();
+
+       rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+       ret = _rte_eth_dev_owner_set(port_id, RTE_ETH_DEV_NO_OWNER, owner);
+
+       rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+       return ret;
+}
+
+int __rte_experimental
+rte_eth_dev_owner_unset(const uint16_t port_id, const uint64_t owner_id)
+{
+       const struct rte_eth_dev_owner new_owner = (struct rte_eth_dev_owner)
+                       {.id = RTE_ETH_DEV_NO_OWNER, .name = ""};
+       int ret;
+
+       rte_eth_dev_shared_data_prepare();
+
+       rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+       ret = _rte_eth_dev_owner_set(port_id, owner_id, &new_owner);
+
+       rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+       return ret;
+}
+
+void __rte_experimental
+rte_eth_dev_owner_delete(const uint64_t owner_id)
+{
+       uint16_t port_id;
+
+       rte_eth_dev_shared_data_prepare();
+
+       rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+       if (rte_eth_is_valid_owner_id(owner_id)) {
+               RTE_ETH_FOREACH_DEV_OWNED_BY(port_id, owner_id)
+                       memset(&rte_eth_devices[port_id].data->owner, 0,
+                              sizeof(struct rte_eth_dev_owner));
+               RTE_PMD_DEBUG_TRACE("All port owners owned by %016X identifier"
+                                   " have removed.\n", owner_id);
+       }
+
+       rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+}
+
+int __rte_experimental
+rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner *owner)
+{
+       int ret = 0;
+
+       rte_eth_dev_shared_data_prepare();
+
+       rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+       if (!rte_eth_dev_is_valid_port(port_id)) {
+               RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+               ret = -ENODEV;
+       } else {
+               rte_memcpy(owner, &rte_eth_devices[port_id].data->owner,
+                          sizeof(*owner));
+       }
+
+       rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+       return ret;
+}
+
 int
 rte_eth_dev_socket_id(uint16_t port_id)
 {
@@ -372,7 +545,7 @@ rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
 
        /* shouldn't check 'rte_eth_devices[i].data',
         * because it might be overwritten by VDEV PMD */
-       tmp = rte_eth_dev_data[port_id].name;
+       tmp = rte_eth_dev_shared_data->data[port_id].name;
        strcpy(name, tmp);
        return 0;
 }
@@ -380,22 +553,22 @@ rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
 int
 rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id)
 {
-       int i;
+       uint32_t pid;
 
        if (name == NULL) {
                RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
                return -EINVAL;
        }
 
-       RTE_ETH_FOREACH_DEV(i) {
-               if (!strncmp(name,
-                       rte_eth_dev_data[i].name, strlen(name))) {
-
-                       *port_id = i;
-
+       for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
+               if (rte_eth_devices[pid].state != RTE_ETH_DEV_UNUSED &&
+                   !strncmp(name, rte_eth_dev_shared_data->data[pid].name,
+                            strlen(name))) {
+                       *port_id = pid;
                        return 0;
                }
        }
+
        return -ENODEV;
 }
 
index e936afb..0361533 100644 (file)
@@ -1218,6 +1218,15 @@ struct rte_eth_dev_sriov {
 
 #define RTE_ETH_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
 
+#define RTE_ETH_DEV_NO_OWNER 0
+
+#define RTE_ETH_MAX_OWNER_NAME_LEN 64
+
+struct rte_eth_dev_owner {
+       uint64_t id; /**< The owner unique identifier. */
+       char name[RTE_ETH_MAX_OWNER_NAME_LEN]; /**< The owner name. */
+};
+
 /** Device supports link state interrupt */
 #define RTE_ETH_DEV_INTR_LSC     0x0002
 /** Device is a bonded slave */
@@ -1225,6 +1234,31 @@ struct rte_eth_dev_sriov {
 /** Device supports device removal interrupt */
 #define RTE_ETH_DEV_INTR_RMV     0x0008
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterates over valid ethdev ports owned by a specific owner.
+ *
+ * @param port_id
+ *   The id of the next possible valid owned port.
+ * @param      owner_id
+ *  The owner identifier.
+ *  RTE_ETH_DEV_NO_OWNER means iterate over all valid ownerless ports.
+ * @return
+ *   Next valid port id owned by owner_id, RTE_MAX_ETHPORTS if there is none.
+ */
+uint64_t __rte_experimental rte_eth_find_next_owned_by(uint16_t port_id,
+               const uint64_t owner_id);
+
+/**
+ * Macro to iterate over all enabled ethdev ports owned by a specific owner.
+ */
+#define RTE_ETH_FOREACH_DEV_OWNED_BY(p, o) \
+       for (p = rte_eth_find_next_owned_by(0, o); \
+            (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS; \
+            p = rte_eth_find_next_owned_by(p + 1, o))
+
 /**
  * Iterates over valid ethdev ports.
  *
@@ -1236,12 +1270,85 @@ struct rte_eth_dev_sriov {
 uint16_t rte_eth_find_next(uint16_t port_id);
 
 /**
- * Macro to iterate over all enabled ethdev ports.
+ * Macro to iterate over all enabled and ownerless ethdev ports.
+ */
+#define RTE_ETH_FOREACH_DEV(p) \
+       RTE_ETH_FOREACH_DEV_OWNED_BY(p, RTE_ETH_DEV_NO_OWNER)
+
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get a new unique owner identifier.
+ * An owner identifier is used to owns Ethernet devices by only one DPDK entity
+ * to avoid multiple management of device by different entities.
+ *
+ * @param      owner_id
+ *   Owner identifier pointer.
+ * @return
+ *   Negative errno value on error, 0 on success.
+ */
+int __rte_experimental rte_eth_dev_owner_new(uint64_t *owner_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set an Ethernet device owner.
+ *
+ * @param      port_id
+ *  The identifier of the port to own.
+ * @param      owner
+ *  The owner pointer.
+ * @return
+ *  Negative errno value on error, 0 on success.
+ */
+int __rte_experimental rte_eth_dev_owner_set(const uint16_t port_id,
+               const struct rte_eth_dev_owner *owner);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Unset Ethernet device owner to make the device ownerless.
+ *
+ * @param      port_id
+ *  The identifier of port to make ownerless.
+ * @param      owner_id
+ *  The owner identifier.
+ * @return
+ *  0 on success, negative errno value on error.
+ */
+int __rte_experimental rte_eth_dev_owner_unset(const uint16_t port_id,
+               const uint64_t owner_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Remove owner from all Ethernet devices owned by a specific owner.
+ *
+ * @param      owner_id
+ *  The owner identifier.
+ */
+void __rte_experimental rte_eth_dev_owner_delete(const uint64_t owner_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get the owner of an Ethernet device.
+ *
+ * @param      port_id
+ *  The port identifier.
+ * @param      owner
+ *  The owner structure pointer to fill.
+ * @return
+ *  0 on success, negative errno value on error..
  */
-#define RTE_ETH_FOREACH_DEV(p)                                 \
-       for (p = rte_eth_find_next(0);                          \
-            (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS;  \
-            p = rte_eth_find_next(p + 1))
+int __rte_experimental rte_eth_dev_owner_get(const uint16_t port_id,
+               struct rte_eth_dev_owner *owner);
 
 /**
  * Get the total number of Ethernet devices that have been successfully
index 3073e78..315b317 100644 (file)
@@ -548,6 +548,7 @@ struct rte_eth_dev {
 } __rte_cache_aligned;
 
 struct rte_eth_dev_sriov;
+struct rte_eth_dev_owner;
 
 /**
  * @internal
@@ -599,6 +600,7 @@ struct rte_eth_dev_data {
        int numa_node;  /**< NUMA node connection */
        struct rte_vlan_filter_conf vlan_filter_conf;
        /**< VLAN filter configuration. */
+       struct rte_eth_dev_owner owner; /**< The port owner. */
 };
 
 /**
index 5c61563..a5cb372 100644 (file)
@@ -209,8 +209,14 @@ EXPERIMENTAL {
        global:
 
        rte_eth_dev_is_removed;
+       rte_eth_dev_owner_delete;
+       rte_eth_dev_owner_get;
+       rte_eth_dev_owner_new;
+       rte_eth_dev_owner_set;
+       rte_eth_dev_owner_unset;
        rte_eth_dev_rx_offload_name;
        rte_eth_dev_tx_offload_name;
+       rte_eth_find_next_owned_by;
        rte_mtr_capabilities_get;
        rte_mtr_create;
        rte_mtr_destroy;