net/bonding: support flow API
authorMatan Azrad <matan@mellanox.com>
Wed, 28 Mar 2018 11:16:01 +0000 (11:16 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 27 Apr 2018 16:34:42 +0000 (17:34 +0100)
Ethernet devices which are grouped by bonding PMD, aka slaves, are
sharing the same queues and RSS configurations and their Rx burst
functions must be managed by the bonding PMD according to the bonding
architecture.

So, it makes sense to configure the same flow rules for all the bond
slaves to allow consistency in packet flow management.

Add rte flow support to the bonding PMD to manage all flow
configuration to the bonded slaves.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Declan Doherty <declan.doherty@intel.com>
doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
drivers/net/bonding/Makefile
drivers/net/bonding/meson.build
drivers/net/bonding/rte_eth_bond_api.c
drivers/net/bonding/rte_eth_bond_flow.c [new file with mode: 0644]
drivers/net/bonding/rte_eth_bond_pmd.c
drivers/net/bonding/rte_eth_bond_private.h

index 0da1e63..56abee5 100644 (file)
@@ -148,6 +148,8 @@ the RX and TX queues are also reconfigured using ``rte_eth_tx_queue_setup`` /
 ``rte_eth_rx_queue_setup`` with the parameters use to configure the bonding
 device. If RSS is enabled for bonding device, this mode is also enabled on new
 slave and configured as well.
+Any flow which was configured to the bond device also is configured to the added
+slave.
 
 Setting up multi-queue mode for bonding device to RSS, makes it fully
 RSS-capable, so all slaves are synchronized with its configuration. This mode is
@@ -166,6 +168,43 @@ it can be easily used as a pattern providing expected behavior, even if slave
 RETAs' sizes are different. If RSS Key is not set for bonded device, it's not
 changed on the slaves and default key for device is used.
 
+As RSS configurations, there is flow consistency in the bonded slaves for the
+next rte flow operations:
+
+Validate:
+       - Validate flow for each slave, failure at least for one slave causes to
+         bond validation failure.
+
+Create:
+       - Create the flow in all slaves.
+       - Save all the slaves created flows objects in bonding internal flow
+         structure.
+       - Failure in flow creation for existed slave rejects the flow.
+       - Failure in flow creation for new slaves in slave adding time rejects
+         the slave.
+
+Destroy:
+       - Destroy the flow in all slaves and release the bond internal flow
+         memory.
+
+Flush:
+       - Destroy all the bonding PMD flows in all the slaves.
+
+.. note::
+
+    Don't call slaves flush directly, It destroys all the slave flows which
+    may include external flows or the bond internal LACP flow.
+
+Query:
+       - Summarize flow counters from all the slaves, relevant only for
+         ``RTE_FLOW_ACTION_TYPE_COUNT``.
+
+Isolate:
+       - Call to flow isolate for all slaves.
+       - Failure in flow isolation for existed slave rejects the isolate mode.
+       - Failure in flow isolation for new slaves in slave adding time rejects
+         the slave.
+
 All settings are managed through the bonding port API and always are propagated
 in one direction (from bonding to slaves).
 
@@ -207,8 +246,8 @@ common hash function available for each of them. Changing RSS key is only
 possible, when all slave devices support the same key size.
 
 To prevent inconsistency on how slaves process packets, once a device is added
-to a bonding device, RSS configuration should be managed through the bonding
-device API, and not directly on the slave.
+to a bonding device, RSS and rte flow configurations should be managed through
+the bonding device API, and not directly on the slave.
 
 Like all other PMD, all functions exported by a PMD are lock-free functions
 that are assumed not to be invoked in parallel on different logical cores to
index 44353a1..1893e3c 100644 (file)
@@ -28,6 +28,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_alb.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_flow.c
 
 #
 # Export include files
index 98fdd37..b644f35 100644 (file)
@@ -3,7 +3,7 @@
 
 name = 'bond' #, james bond :-)
 version = 2
-sources = files('rte_eth_bond_api.c', 'rte_eth_bond_pmd.c',
+sources = files('rte_eth_bond_api.c', 'rte_eth_bond_pmd.c', 'rte_eth_bond_flow.c',
        'rte_eth_bond_args.c', 'rte_eth_bond_8023ad.c', 'rte_eth_bond_alb.c')
 
 deps += 'sched' # needed for rte_bitmap.h
index aa89425..d558df8 100644 (file)
@@ -226,6 +226,49 @@ slave_vlan_filter_set(uint16_t bonded_port_id, uint16_t slave_port_id)
        return res;
 }
 
+static int
+slave_rte_flow_prepare(uint16_t slave_id, struct bond_dev_private *internals)
+{
+       struct rte_flow *flow;
+       struct rte_flow_error ferror;
+       uint16_t slave_port_id = internals->slaves[slave_id].port_id;
+
+       if (internals->flow_isolated_valid != 0) {
+               rte_eth_dev_stop(slave_port_id);
+               if (rte_flow_isolate(slave_port_id, internals->flow_isolated,
+                   &ferror)) {
+                       RTE_BOND_LOG(ERR, "rte_flow_isolate failed for slave"
+                                    " %d: %s", slave_id, ferror.message ?
+                                    ferror.message : "(no stated reason)");
+                       return -1;
+               }
+       }
+       TAILQ_FOREACH(flow, &internals->flow_list, next) {
+               flow->flows[slave_id] = rte_flow_create(slave_port_id,
+                                                       &flow->fd->attr,
+                                                       flow->fd->items,
+                                                       flow->fd->actions,
+                                                       &ferror);
+               if (flow->flows[slave_id] == NULL) {
+                       RTE_BOND_LOG(ERR, "Cannot create flow for slave"
+                                    " %d: %s", slave_id,
+                                    ferror.message ? ferror.message :
+                                    "(no stated reason)");
+                       /* Destroy successful bond flows from the slave */
+                       TAILQ_FOREACH(flow, &internals->flow_list, next) {
+                               if (flow->flows[slave_id] != NULL) {
+                                       rte_flow_destroy(slave_port_id,
+                                                        flow->flows[slave_id],
+                                                        &ferror);
+                                       flow->flows[slave_id] = NULL;
+                               }
+                       }
+                       return -1;
+               }
+       }
+       return 0;
+}
+
 static int
 __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
@@ -324,6 +367,12 @@ __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
        bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
                        internals->flow_type_rss_offloads;
 
+       if (slave_rte_flow_prepare(internals->slave_count, internals) != 0) {
+               RTE_BOND_LOG(ERR, "Failed to prepare new slave flows: port=%d",
+                            slave_port_id);
+               return -1;
+       }
+
        internals->slave_count++;
 
        if (bonded_eth_dev->data->dev_started) {
@@ -401,6 +450,8 @@ __eth_bond_slave_remove_lock_free(uint16_t bonded_port_id,
        struct rte_eth_dev *bonded_eth_dev;
        struct bond_dev_private *internals;
        struct rte_eth_dev *slave_eth_dev;
+       struct rte_flow_error flow_error;
+       struct rte_flow *flow;
        int i, slave_idx;
 
        bonded_eth_dev = &rte_eth_devices[bonded_port_id];
@@ -440,6 +491,18 @@ __eth_bond_slave_remove_lock_free(uint16_t bonded_port_id,
        rte_eth_dev_default_mac_addr_set(slave_port_id,
                        &(internals->slaves[slave_idx].persisted_mac_addr));
 
+       /*
+        * Remove bond device flows from slave device.
+        * Note: don't restore flow isolate mode.
+        */
+       TAILQ_FOREACH(flow, &internals->flow_list, next) {
+               if (flow->flows[slave_idx] != NULL) {
+                       rte_flow_destroy(slave_port_id, flow->flows[slave_idx],
+                                        &flow_error);
+                       flow->flows[slave_idx] = NULL;
+               }
+       }
+
        slave_eth_dev = &rte_eth_devices[slave_port_id];
        slave_remove(internals, slave_eth_dev);
        slave_eth_dev->data->dev_flags &= (~RTE_ETH_DEV_BONDED_SLAVE);
diff --git a/drivers/net/bonding/rte_eth_bond_flow.c b/drivers/net/bonding/rte_eth_bond_flow.c
new file mode 100644 (file)
index 0000000..8093c04
--- /dev/null
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_flow.h>
+
+#include "rte_eth_bond_private.h"
+
+static struct rte_flow *
+bond_flow_alloc(int numa_node, const struct rte_flow_attr *attr,
+                  const struct rte_flow_item *items,
+                  const struct rte_flow_action *actions)
+{
+       struct rte_flow *flow;
+       size_t fdsz;
+
+       fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
+       flow = rte_zmalloc_socket(NULL, sizeof(struct rte_flow) + fdsz,
+                                 RTE_CACHE_LINE_SIZE, numa_node);
+       if (unlikely(flow == NULL)) {
+               RTE_BOND_LOG(ERR, "Could not allocate new flow");
+               return NULL;
+       }
+       flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
+       if (unlikely(rte_flow_copy(flow->fd, fdsz, attr, items, actions) !=
+                    fdsz)) {
+               RTE_BOND_LOG(ERR, "Failed to copy flow description");
+               rte_free(flow);
+               return NULL;
+       }
+       return flow;
+}
+
+static void
+bond_flow_release(struct rte_flow **flow)
+{
+       rte_free(*flow);
+       *flow = NULL;
+}
+
+static int
+bond_flow_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
+                  const struct rte_flow_item patterns[],
+                  const struct rte_flow_action actions[],
+                  struct rte_flow_error *err)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       int i;
+       int ret;
+
+       for (i = 0; i < internals->slave_count; i++) {
+               ret = rte_flow_validate(internals->slaves[i].port_id, attr,
+                                       patterns, actions, err);
+               if (ret) {
+                       RTE_BOND_LOG(ERR, "Operation rte_flow_validate failed"
+                                    " for slave %d with error %d", i, ret);
+                       return ret;
+               }
+       }
+       return 0;
+}
+
+static struct rte_flow *
+bond_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
+                const struct rte_flow_item patterns[],
+                const struct rte_flow_action actions[],
+                struct rte_flow_error *err)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       struct rte_flow *flow;
+       int i;
+
+       flow = bond_flow_alloc(dev->data->numa_node, attr, patterns, actions);
+       if (unlikely(flow == NULL)) {
+               rte_flow_error_set(err, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                  NULL, rte_strerror(ENOMEM));
+               return NULL;
+       }
+       for (i = 0; i < internals->slave_count; i++) {
+               flow->flows[i] = rte_flow_create(internals->slaves[i].port_id,
+                                                attr, patterns, actions, err);
+               if (unlikely(flow->flows[i] == NULL)) {
+                       RTE_BOND_LOG(ERR, "Failed to create flow on slave %d",
+                                    i);
+                       goto err;
+               }
+       }
+       TAILQ_INSERT_TAIL(&internals->flow_list, flow, next);
+       return flow;
+err:
+       /* Destroy all slaves flows. */
+       for (i = 0; i < internals->slave_count; i++) {
+               if (flow->flows[i] != NULL)
+                       rte_flow_destroy(internals->slaves[i].port_id,
+                                        flow->flows[i], err);
+       }
+       bond_flow_release(&flow);
+       return NULL;
+}
+
+static int
+bond_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow,
+                 struct rte_flow_error *err)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       int i;
+       int ret = 0;
+
+       for (i = 0; i < internals->slave_count; i++) {
+               int lret;
+
+               if (unlikely(flow->flows[i] == NULL))
+                       continue;
+               lret = rte_flow_destroy(internals->slaves[i].port_id,
+                                       flow->flows[i], err);
+               if (unlikely(lret != 0)) {
+                       RTE_BOND_LOG(ERR, "Failed to destroy flow on slave %d:"
+                                    " %d", i, lret);
+                       ret = lret;
+               }
+       }
+       TAILQ_REMOVE(&internals->flow_list, flow, next);
+       bond_flow_release(&flow);
+       return ret;
+}
+
+static int
+bond_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *err)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       struct rte_flow *flow;
+       void *tmp;
+       int ret = 0;
+       int lret;
+
+       /* Destroy all bond flows from its slaves instead of flushing them to
+        * keep the LACP flow or any other external flows.
+        */
+       TAILQ_FOREACH_SAFE(flow, &internals->flow_list, next, tmp) {
+               lret = bond_flow_destroy(dev, flow, err);
+               if (unlikely(lret != 0))
+                       ret = lret;
+       }
+       if (unlikely(ret != 0))
+               RTE_BOND_LOG(ERR, "Failed to flush flow in all slaves");
+       return ret;
+}
+
+static int
+bond_flow_query_count(struct rte_eth_dev *dev, struct rte_flow *flow,
+                     struct rte_flow_query_count *count,
+                     struct rte_flow_error *err)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       struct rte_flow_query_count slave_count;
+       int i;
+       int ret;
+
+       count->bytes = 0;
+       count->hits = 0;
+       rte_memcpy(&slave_count, count, sizeof(slave_count));
+       for (i = 0; i < internals->slave_count; i++) {
+               ret = rte_flow_query(internals->slaves[i].port_id,
+                                    flow->flows[i], RTE_FLOW_ACTION_TYPE_COUNT,
+                                    &slave_count, err);
+               if (unlikely(ret != 0)) {
+                       RTE_BOND_LOG(ERR, "Failed to query flow on"
+                                    " slave %d: %d", i, ret);
+                       return ret;
+               }
+               count->bytes += slave_count.bytes;
+               count->hits += slave_count.hits;
+               slave_count.bytes = 0;
+               slave_count.hits = 0;
+       }
+       return 0;
+}
+
+static int
+bond_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow,
+               enum rte_flow_action_type type, void *arg,
+               struct rte_flow_error *err)
+{
+       switch (type) {
+       case RTE_FLOW_ACTION_TYPE_COUNT:
+               return bond_flow_query_count(dev, flow, arg, err);
+       default:
+               return rte_flow_error_set(err, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, arg,
+                                         rte_strerror(ENOTSUP));
+       }
+}
+
+static int
+bond_flow_isolate(struct rte_eth_dev *dev, int set,
+                 struct rte_flow_error *err)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       int i;
+       int ret;
+
+       for (i = 0; i < internals->slave_count; i++) {
+               ret = rte_flow_isolate(internals->slaves[i].port_id, set, err);
+               if (unlikely(ret != 0)) {
+                       RTE_BOND_LOG(ERR, "Operation rte_flow_isolate failed"
+                                    " for slave %d with error %d", i, ret);
+                       internals->flow_isolated_valid = 0;
+                       return ret;
+               }
+       }
+       internals->flow_isolated = set;
+       internals->flow_isolated_valid = 1;
+       return 0;
+}
+
+const struct rte_flow_ops bond_flow_ops = {
+       .validate = bond_flow_validate,
+       .create = bond_flow_create,
+       .destroy = bond_flow_destroy,
+       .flush = bond_flow_flush,
+       .query = bond_flow_query,
+       .isolate = bond_flow_isolate,
+};
index 09696ea..7a1ef60 100644 (file)
@@ -1964,10 +1964,19 @@ slave_remove(struct bond_dev_private *internals,
                                slave_eth_dev->data->port_id)
                        break;
 
-       if (i < (internals->slave_count - 1))
+       if (i < (internals->slave_count - 1)) {
+               struct rte_flow *flow;
+
                memmove(&internals->slaves[i], &internals->slaves[i + 1],
                                sizeof(internals->slaves[0]) *
                                (internals->slave_count - i - 1));
+               TAILQ_FOREACH(flow, &internals->flow_list, next) {
+                       memmove(&flow->flows[i], &flow->flows[i + 1],
+                               sizeof(flow->flows[0]) *
+                               (internals->slave_count - i - 1));
+                       flow->flows[internals->slave_count - 1] = NULL;
+               }
+       }
 
        internals->slave_count--;
 
@@ -2191,6 +2200,7 @@ bond_ethdev_close(struct rte_eth_dev *dev)
        struct bond_dev_private *internals = dev->data->dev_private;
        uint8_t bond_port_id = internals->port_id;
        int skipped = 0;
+       struct rte_flow_error ferror;
 
        RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
        while (internals->slave_count != skipped) {
@@ -2205,6 +2215,7 @@ bond_ethdev_close(struct rte_eth_dev *dev)
                        skipped++;
                }
        }
+       bond_flow_ops.flush(dev, &ferror);
        bond_ethdev_free_queues(dev);
        rte_bitmap_reset(internals->vlan_filter_bmp);
 }
@@ -2883,6 +2894,17 @@ bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
        return 0;
 }
 
+static int
+bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
+                enum rte_filter_type type, enum rte_filter_op op, void *arg)
+{
+       if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
+               *(const void **)arg = &bond_flow_ops;
+               return 0;
+       }
+       return -ENOTSUP;
+}
+
 const struct eth_dev_ops default_dev_ops = {
        .dev_start            = bond_ethdev_start,
        .dev_stop             = bond_ethdev_stop,
@@ -2904,7 +2926,8 @@ const struct eth_dev_ops default_dev_ops = {
        .rss_hash_update      = bond_ethdev_rss_hash_update,
        .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
        .mtu_set              = bond_ethdev_mtu_set,
-       .mac_addr_set         = bond_ethdev_mac_address_set
+       .mac_addr_set         = bond_ethdev_mac_address_set,
+       .filter_ctrl          = bond_filter_ctrl
 };
 
 static int
@@ -2972,6 +2995,9 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
        memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
        memset(internals->slaves, 0, sizeof(internals->slaves));
 
+       TAILQ_INIT(&internals->flow_list);
+       internals->flow_isolated_valid = 0;
+
        /* Set mode 4 default configuration */
        bond_mode_8023ad_setup(eth_dev, NULL);
        if (bond_ethdev_mode_set(eth_dev, mode)) {
index 94eca88..d64d216 100644 (file)
@@ -5,9 +5,12 @@
 #ifndef _RTE_ETH_BOND_PRIVATE_H_
 #define _RTE_ETH_BOND_PRIVATE_H_
 
+#include <sys/queue.h>
+
 #include <rte_ethdev_driver.h>
 #include <rte_spinlock.h>
 #include <rte_bitmap.h>
+#include <rte_flow_driver.h>
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_8023ad_private.h"
@@ -37,6 +40,8 @@ extern const char *pmd_bond_init_valid_arguments[];
 
 extern struct rte_vdev_driver pmd_bond_drv;
 
+extern const struct rte_flow_ops bond_flow_ops;
+
 /** Port Queue Mapping Structure */
 struct bond_rx_queue {
        uint16_t queue_id;
@@ -80,6 +85,14 @@ struct bond_slave_details {
        uint16_t reta_size;
 };
 
+struct rte_flow {
+       TAILQ_ENTRY(rte_flow) next;
+       /* Slaves flows */
+       struct rte_flow *flows[RTE_MAX_ETHPORTS];
+       /* Flow description for synchronization */
+       struct rte_flow_desc *fd;
+};
+
 typedef void (*burst_xmit_hash_t)(struct rte_mbuf **buf, uint16_t nb_pkts,
                uint8_t slave_count, uint16_t *slaves);
 
@@ -133,6 +146,13 @@ struct bond_dev_private {
        uint64_t rx_queue_offload_capa; /** per queue Rx offload capability */
        uint64_t tx_queue_offload_capa; /** per queue Tx offload capability */
 
+       /**< List of the configured flows */
+       TAILQ_HEAD(sub_flows, rte_flow) flow_list;
+
+       /**< Flow isolation state */
+       int flow_isolated;
+       int flow_isolated_valid;
+
        /** Bit mask of RSS offloads, the bit offset also means flow type */
        uint64_t flow_type_rss_offloads;