bonding: support RSS dynamic configuration
authorTomasz Kulasek <tomaszx.kulasek@intel.com>
Fri, 30 Oct 2015 14:25:48 +0000 (15:25 +0100)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Sun, 1 Nov 2015 17:10:48 +0000 (18:10 +0100)
Bonding device implements independent management of RSS settings. It
stores its own copies of settings i.e. RETA, RSS hash function and RSS
key. It’s required to ensure consistency.

1) RSS hash function set for bonding device is maximal set of RSS hash
functions supported by all bonded devices. That mean, to have RSS support
for bonding, all slaves should be RSS-capable.

2) RSS key is propagated over the slaves "as is".

3) RETA for bonding is an internal table managed by bonding API, and is
used as a pattern to set up slaves. Its size is GCD of all RETA sizes, so
it can be easily used as a pattern providing expected behavior, even if
slaves RETA sizes are different.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
Acked-by: Declan Doherty <declan.doherty@intel.com>
doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
doc/guides/rel_notes/release_2_2.rst
drivers/net/bonding/rte_eth_bond_api.c
drivers/net/bonding/rte_eth_bond_pmd.c
drivers/net/bonding/rte_eth_bond_private.h

index 03baf90..46f0296 100644 (file)
@@ -1,5 +1,5 @@
 ..  BSD LICENSE
-    Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+    Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
     All rights reserved.
 
     Redistribution and use in source and binary forms, with or without
@@ -173,7 +173,28 @@ After a slave device is added to a bonded device slave is stopped using
 ``rte_eth_dev_stop`` and then reconfigured using ``rte_eth_dev_configure``
 the RX and TX queues are also reconfigured using ``rte_eth_tx_queue_setup`` /
 ``rte_eth_rx_queue_setup`` with the parameters use to configure the bonding
-device.
+device. If RSS is enabled for bonding device, this mode is also enabled on new
+slave and configured as well.
+
+Setting up multi-queue mode for bonding device to RSS, makes it fully
+RSS-capable, so all slaves are synchronized with its configuration. This mode is
+intended to provide RSS configuration on slaves transparent for client
+application implementation.
+
+Bonding device stores its own version of RSS settings i.e. RETA, RSS hash
+function and RSS key, used to set up its slaves. That let to define the meaning
+of RSS configuration of bonding device as desired configuration of whole bonding
+(as one unit), without pointing any of slave inside. It is required to ensure
+consistency and made it more errorproof.
+
+RSS hash function set for bonding device, is a maximal set of RSS hash functions
+supported by all bonded slaves. RETA size is a GCD of all its RETA's sizes, so
+it can be easily used as a pattern providing expected behavior, even if slave
+RETAs' sizes are different. If RSS Key is not set for bonded device, it's not
+changed on the slaves and default key for device is used.
+
+All settings are managed through the bonding port API and always are propagated
+in one direction (from bonding to slaves).
 
 Link Status Change Interrupts / Polling
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -207,6 +228,15 @@ these parameters.
 A bonding device must have a minimum of one slave before the bonding device
 itself can be started.
 
+To use a bonding device dynamic RSS configuration feature effectively, it is
+also required, that all slaves should be RSS-capable and support, at least one
+common hash function available for each of them. Changing RSS key is only
+possible, when all slave devices support the same key size.
+
+To prevent inconsistency on how slaves process packets, once a device is added
+to a bonding device, RSS configuration should be managed through the bonding
+device API, and not directly on the slave.
+
 Like all other PMD, all functions exported by a PMD are lock-free functions
 that are assumed not to be invoked in parallel on different logical cores to
 work on the same target object.
index 0b8a263..03d93d6 100644 (file)
@@ -4,6 +4,8 @@ DPDK Release 2.2
 New Features
 ------------
 
+* **Added RSS dynamic configuration to bonding.**
+
 * **Added e1000 Rx interrupt support.**
 
 * **Added igb TSO support for both PF and VF.**
index 0681d1a..92073df 100644 (file)
@@ -273,6 +273,9 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
        internals->rx_offload_capa = 0;
        internals->tx_offload_capa = 0;
 
+       /* Initially allow to choose any offload type */
+       internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
+
        memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
        memset(internals->slaves, 0, sizeof(internals->slaves));
 
@@ -369,6 +372,11 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 
        rte_eth_dev_info_get(slave_port_id, &dev_info);
 
+       /* We need to store slaves reta_size to be able to synchronize RETA for all
+        * slave devices even if its sizes are different.
+        */
+       internals->slaves[internals->slave_count].reta_size = dev_info.reta_size;
+
        if (internals->slave_count < 1) {
                /* if MAC is not user defined then use MAC of first slave add to
                 * bonded device */
@@ -382,9 +390,16 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
                /* Make primary slave */
                internals->primary_port = slave_port_id;
 
+               /* Inherit queues settings from first slave */
+               internals->nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
+               internals->nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
+
+               internals->reta_size = dev_info.reta_size;
+
                /* Take the first dev's offload capabilities */
                internals->rx_offload_capa = dev_info.rx_offload_capa;
                internals->tx_offload_capa = dev_info.tx_offload_capa;
+               internals->flow_type_rss_offloads = dev_info.flow_type_rss_offloads;
 
        } else {
                /* Check slave link properties are supported if props are set,
@@ -403,8 +418,19 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
                }
                internals->rx_offload_capa &= dev_info.rx_offload_capa;
                internals->tx_offload_capa &= dev_info.tx_offload_capa;
+               internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads;
+
+               /* RETA size is GCD of all slaves RETA sizes, so, if all sizes will be
+                * the power of 2, the lower one is GCD
+                */
+               if (internals->reta_size > dev_info.reta_size)
+                       internals->reta_size = dev_info.reta_size;
+
        }
 
+       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
+                       internals->flow_type_rss_offloads;
+
        internals->slave_count++;
 
        /* Update all slave devices MACs*/
@@ -531,6 +557,8 @@ __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
        if (internals->slave_count == 0) {
                internals->rx_offload_capa = 0;
                internals->tx_offload_capa = 0;
+               internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
+               internals->reta_size = 0;
        }
        return 0;
 }
index 5cc6372..2880f5c 100644 (file)
@@ -1310,6 +1310,23 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
        if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
                slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
 
+       /* If RSS is enabled for bonding, try to enable it for slaves  */
+       if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+               if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
+                               != 0) {
+                       slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
+                                       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
+                       slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
+                                       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
+               } else {
+                       slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
+               }
+
+               slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
+                               bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+               slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
+       }
+
        /* Configure device */
        errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
                        bonded_eth_dev->data->nb_rx_queues,
@@ -1361,6 +1378,30 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
                return -1;
        }
 
+       /* If RSS is enabled for bonding, synchronize RETA */
+       if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+               int i;
+               struct bond_dev_private *internals;
+
+               internals = bonded_eth_dev->data->dev_private;
+
+               for (i = 0; i < internals->slave_count; i++) {
+                       if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
+                               errval = rte_eth_dev_rss_reta_update(
+                                               slave_eth_dev->data->port_id,
+                                               &internals->reta_conf[0],
+                                               internals->slaves[i].reta_size);
+                               if (errval != 0) {
+                                       RTE_LOG(WARNING, PMD,
+                                                       "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
+                                                       " RSS Configuration for bonding may be inconsistent.\n",
+                                                       slave_eth_dev->data->port_id, errval);
+                               }
+                               break;
+                       }
+               }
+       }
+
        /* If lsc interrupt is set, check initial slave's link status */
        if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
                bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
@@ -1596,6 +1637,9 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 
        dev_info->rx_offload_capa = internals->rx_offload_capa;
        dev_info->tx_offload_capa = internals->tx_offload_capa;
+       dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
+
+       dev_info->reta_size = internals->reta_size;
 }
 
 static int
@@ -1977,21 +2021,132 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
        }
 }
 
+static int
+bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
+               struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
+{
+       unsigned i, j;
+       int result = 0;
+       int slave_reta_size;
+       unsigned reta_count;
+       struct bond_dev_private *internals = dev->data->dev_private;
+
+       if (reta_size != internals->reta_size)
+               return -EINVAL;
+
+        /* Copy RETA table */
+       reta_count = reta_size / RTE_RETA_GROUP_SIZE;
+
+       for (i = 0; i < reta_count; i++) {
+               internals->reta_conf[i].mask = reta_conf[i].mask;
+               for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+                       if ((reta_conf[i].mask >> j) & 0x01)
+                               internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
+       }
+
+       /* Fill rest of array */
+       for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
+               memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
+                               sizeof(internals->reta_conf[0]) * reta_count);
+
+       /* Propagate RETA over slaves */
+       for (i = 0; i < internals->slave_count; i++) {
+               slave_reta_size = internals->slaves[i].reta_size;
+               result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
+                               &internals->reta_conf[0], slave_reta_size);
+               if (result < 0)
+                       return result;
+       }
+
+       return 0;
+}
+
+static int
+bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
+               struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
+{
+       int i, j;
+       struct bond_dev_private *internals = dev->data->dev_private;
+
+       if (reta_size != internals->reta_size)
+               return -EINVAL;
+
+        /* Copy RETA table */
+       for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
+               for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+                       if ((reta_conf[i].mask >> j) & 0x01)
+                               reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
+
+       return 0;
+}
+
+static int
+bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
+               struct rte_eth_rss_conf *rss_conf)
+{
+       int i, result = 0;
+       struct bond_dev_private *internals = dev->data->dev_private;
+       struct rte_eth_rss_conf bond_rss_conf;
+
+       memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
+
+       bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
+
+       if (bond_rss_conf.rss_hf != 0)
+               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
+
+       if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
+                       sizeof(internals->rss_key)) {
+               if (bond_rss_conf.rss_key_len == 0)
+                       bond_rss_conf.rss_key_len = 40;
+               internals->rss_key_len = bond_rss_conf.rss_key_len;
+               memcpy(internals->rss_key, bond_rss_conf.rss_key,
+                               internals->rss_key_len);
+       }
+
+       for (i = 0; i < internals->slave_count; i++) {
+               result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
+                               &bond_rss_conf);
+               if (result < 0)
+                       return result;
+       }
+
+       return 0;
+}
+
+static int
+bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
+               struct rte_eth_rss_conf *rss_conf)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+
+       rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+       rss_conf->rss_key_len = internals->rss_key_len;
+       if (rss_conf->rss_key)
+               memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
+
+       return 0;
+}
+
 struct eth_dev_ops default_dev_ops = {
-               .dev_start = bond_ethdev_start,
-               .dev_stop = bond_ethdev_stop,
-               .dev_close = bond_ethdev_close,
-               .dev_configure = bond_ethdev_configure,
-               .dev_infos_get = bond_ethdev_info,
-               .rx_queue_setup = bond_ethdev_rx_queue_setup,
-               .tx_queue_setup = bond_ethdev_tx_queue_setup,
-               .rx_queue_release = bond_ethdev_rx_queue_release,
-               .tx_queue_release = bond_ethdev_tx_queue_release,
-               .link_update = bond_ethdev_link_update,
-               .stats_get = bond_ethdev_stats_get,
-               .stats_reset = bond_ethdev_stats_reset,
-               .promiscuous_enable = bond_ethdev_promiscuous_enable,
-               .promiscuous_disable = bond_ethdev_promiscuous_disable
+               .dev_start            = bond_ethdev_start,
+               .dev_stop             = bond_ethdev_stop,
+               .dev_close            = bond_ethdev_close,
+               .dev_configure        = bond_ethdev_configure,
+               .dev_infos_get        = bond_ethdev_info,
+               .rx_queue_setup       = bond_ethdev_rx_queue_setup,
+               .tx_queue_setup       = bond_ethdev_tx_queue_setup,
+               .rx_queue_release     = bond_ethdev_rx_queue_release,
+               .tx_queue_release     = bond_ethdev_tx_queue_release,
+               .link_update          = bond_ethdev_link_update,
+               .stats_get            = bond_ethdev_stats_get,
+               .stats_reset          = bond_ethdev_stats_reset,
+               .promiscuous_enable   = bond_ethdev_promiscuous_enable,
+               .promiscuous_disable  = bond_ethdev_promiscuous_disable,
+               .reta_update          = bond_ethdev_rss_reta_update,
+               .reta_query           = bond_ethdev_rss_reta_query,
+               .rss_hash_update      = bond_ethdev_rss_hash_update,
+               .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
 };
 
 static int
@@ -2090,6 +2245,28 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
        int arg_count;
        uint8_t port_id = dev - rte_eth_devices;
 
+       static const uint8_t default_rss_key[40] = {
+               0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
+               0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
+               0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
+               0xBE, 0xAC, 0x01, 0xFA
+       };
+
+       unsigned i, j;
+
+       /* If RSS is enabled, fill table and key with default values */
+       if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
+               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
+               memcpy(internals->rss_key, default_rss_key, 40);
+
+               for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
+                       internals->reta_conf[i].mask = ~0LL;
+                       for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+                               internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
+               }
+       }
+
        /*
         * if no kvlist, it means that this bonded device has been created
         * through the bonding api.
index 038bca6..e7af809 100644 (file)
@@ -103,6 +103,8 @@ struct bond_slave_details {
        uint8_t last_link_status;
        /**< Port Id of slave eth_dev */
        struct ether_addr persisted_mac_addr;
+
+       uint16_t reta_size;
 };
 
 
@@ -155,6 +157,16 @@ struct bond_dev_private {
        uint32_t rx_offload_capa;            /** Rx offload capability */
        uint32_t tx_offload_capa;            /** Tx offload capability */
 
+       /** Bit mask of RSS offloads, the bit offset also means flow type */
+       uint64_t flow_type_rss_offloads;
+
+       uint16_t reta_size;
+       struct rte_eth_rss_reta_entry64 reta_conf[ETH_RSS_RETA_SIZE_512 /
+                       RTE_RETA_GROUP_SIZE];
+
+       uint8_t rss_key[52];                            /**< 52-byte hash key buffer. */
+       uint8_t rss_key_len;                            /**< hash key length in bytes. */
+
        struct rte_kvargs *kvlist;
        uint8_t slave_update_idx;
 };