]> git.droids-corp.org - dpdk.git/commitdiff
bonding: support RSS dynamic configuration
authorTomasz Kulasek <tomaszx.kulasek@intel.com>
Fri, 30 Oct 2015 14:25:48 +0000 (15:25 +0100)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Sun, 1 Nov 2015 17:10:48 +0000 (18:10 +0100)
Bonding device implements independent management of RSS settings. It
stores its own copies of settings i.e. RETA, RSS hash function and RSS
key. It’s required to ensure consistency.

1) RSS hash function set for bonding device is maximal set of RSS hash
functions supported by all bonded devices. That mean, to have RSS support
for bonding, all slaves should be RSS-capable.

2) RSS key is propagated over the slaves "as is".

3) RETA for bonding is an internal table managed by bonding API, and is
used as a pattern to set up slaves. Its size is GCD of all RETA sizes, so
it can be easily used as a pattern providing expected behavior, even if
slaves RETA sizes are different.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
Acked-by: Declan Doherty <declan.doherty@intel.com>
doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
doc/guides/rel_notes/release_2_2.rst
drivers/net/bonding/rte_eth_bond_api.c
drivers/net/bonding/rte_eth_bond_pmd.c
drivers/net/bonding/rte_eth_bond_private.h

index 03baf90d38b5c1897bdc34de9837dddc6694dacb..46f0296e2d2904c10d149dc4889a7891564adefd 100644 (file)
@@ -1,5 +1,5 @@
 ..  BSD LICENSE
-    Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+    Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
     All rights reserved.
 
     Redistribution and use in source and binary forms, with or without
@@ -173,7 +173,28 @@ After a slave device is added to a bonded device slave is stopped using
 ``rte_eth_dev_stop`` and then reconfigured using ``rte_eth_dev_configure``
 the RX and TX queues are also reconfigured using ``rte_eth_tx_queue_setup`` /
 ``rte_eth_rx_queue_setup`` with the parameters use to configure the bonding
-device.
+device. If RSS is enabled for bonding device, this mode is also enabled on new
+slave and configured as well.
+
+Setting up multi-queue mode for bonding device to RSS, makes it fully
+RSS-capable, so all slaves are synchronized with its configuration. This mode is
+intended to provide RSS configuration on slaves transparent for client
+application implementation.
+
+Bonding device stores its own version of RSS settings i.e. RETA, RSS hash
+function and RSS key, used to set up its slaves. That let to define the meaning
+of RSS configuration of bonding device as desired configuration of whole bonding
+(as one unit), without pointing any of slave inside. It is required to ensure
+consistency and made it more errorproof.
+
+RSS hash function set for bonding device, is a maximal set of RSS hash functions
+supported by all bonded slaves. RETA size is a GCD of all its RETA's sizes, so
+it can be easily used as a pattern providing expected behavior, even if slave
+RETAs' sizes are different. If RSS Key is not set for bonded device, it's not
+changed on the slaves and default key for device is used.
+
+All settings are managed through the bonding port API and always are propagated
+in one direction (from bonding to slaves).
 
 Link Status Change Interrupts / Polling
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -207,6 +228,15 @@ these parameters.
 A bonding device must have a minimum of one slave before the bonding device
 itself can be started.
 
+To use a bonding device dynamic RSS configuration feature effectively, it is
+also required, that all slaves should be RSS-capable and support, at least one
+common hash function available for each of them. Changing RSS key is only
+possible, when all slave devices support the same key size.
+
+To prevent inconsistency on how slaves process packets, once a device is added
+to a bonding device, RSS configuration should be managed through the bonding
+device API, and not directly on the slave.
+
 Like all other PMD, all functions exported by a PMD are lock-free functions
 that are assumed not to be invoked in parallel on different logical cores to
 work on the same target object.
index 0b8a2639540f0fc3c1cf1cb897fc90ea5873cbfc..03d93d61f9268039d80c6079b2d8abef9b067a37 100644 (file)
@@ -4,6 +4,8 @@ DPDK Release 2.2
 New Features
 ------------
 
+* **Added RSS dynamic configuration to bonding.**
+
 * **Added e1000 Rx interrupt support.**
 
 * **Added igb TSO support for both PF and VF.**
index 0681d1adc4e4f58e0c353c7bd22ecc24fc33029c..92073dff51645d69985db90aabf54f5e50bb8cd8 100644 (file)
@@ -273,6 +273,9 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
        internals->rx_offload_capa = 0;
        internals->tx_offload_capa = 0;
 
+       /* Initially allow to choose any offload type */
+       internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
+
        memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
        memset(internals->slaves, 0, sizeof(internals->slaves));
 
@@ -369,6 +372,11 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 
        rte_eth_dev_info_get(slave_port_id, &dev_info);
 
+       /* We need to store slaves reta_size to be able to synchronize RETA for all
+        * slave devices even if its sizes are different.
+        */
+       internals->slaves[internals->slave_count].reta_size = dev_info.reta_size;
+
        if (internals->slave_count < 1) {
                /* if MAC is not user defined then use MAC of first slave add to
                 * bonded device */
@@ -382,9 +390,16 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
                /* Make primary slave */
                internals->primary_port = slave_port_id;
 
+               /* Inherit queues settings from first slave */
+               internals->nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
+               internals->nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
+
+               internals->reta_size = dev_info.reta_size;
+
                /* Take the first dev's offload capabilities */
                internals->rx_offload_capa = dev_info.rx_offload_capa;
                internals->tx_offload_capa = dev_info.tx_offload_capa;
+               internals->flow_type_rss_offloads = dev_info.flow_type_rss_offloads;
 
        } else {
                /* Check slave link properties are supported if props are set,
@@ -403,8 +418,19 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
                }
                internals->rx_offload_capa &= dev_info.rx_offload_capa;
                internals->tx_offload_capa &= dev_info.tx_offload_capa;
+               internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads;
+
+               /* RETA size is GCD of all slaves RETA sizes, so, if all sizes will be
+                * the power of 2, the lower one is GCD
+                */
+               if (internals->reta_size > dev_info.reta_size)
+                       internals->reta_size = dev_info.reta_size;
+
        }
 
+       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
+                       internals->flow_type_rss_offloads;
+
        internals->slave_count++;
 
        /* Update all slave devices MACs*/
@@ -531,6 +557,8 @@ __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
        if (internals->slave_count == 0) {
                internals->rx_offload_capa = 0;
                internals->tx_offload_capa = 0;
+               internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
+               internals->reta_size = 0;
        }
        return 0;
 }
index 5cc637239e1d0f15a99371b2c26e4a7b8ef4cea5..2880f5c1826facf943eb53110dc4ab14b6df894e 100644 (file)
@@ -1310,6 +1310,23 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
        if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
                slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
 
+       /* If RSS is enabled for bonding, try to enable it for slaves  */
+       if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+               if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
+                               != 0) {
+                       slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
+                                       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
+                       slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
+                                       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
+               } else {
+                       slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
+               }
+
+               slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
+                               bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+               slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
+       }
+
        /* Configure device */
        errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
                        bonded_eth_dev->data->nb_rx_queues,
@@ -1361,6 +1378,30 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
                return -1;
        }
 
+       /* If RSS is enabled for bonding, synchronize RETA */
+       if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+               int i;
+               struct bond_dev_private *internals;
+
+               internals = bonded_eth_dev->data->dev_private;
+
+               for (i = 0; i < internals->slave_count; i++) {
+                       if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
+                               errval = rte_eth_dev_rss_reta_update(
+                                               slave_eth_dev->data->port_id,
+                                               &internals->reta_conf[0],
+                                               internals->slaves[i].reta_size);
+                               if (errval != 0) {
+                                       RTE_LOG(WARNING, PMD,
+                                                       "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
+                                                       " RSS Configuration for bonding may be inconsistent.\n",
+                                                       slave_eth_dev->data->port_id, errval);
+                               }
+                               break;
+                       }
+               }
+       }
+
        /* If lsc interrupt is set, check initial slave's link status */
        if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
                bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
@@ -1596,6 +1637,9 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 
        dev_info->rx_offload_capa = internals->rx_offload_capa;
        dev_info->tx_offload_capa = internals->tx_offload_capa;
+       dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
+
+       dev_info->reta_size = internals->reta_size;
 }
 
 static int
@@ -1977,21 +2021,132 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
        }
 }
 
+static int
+bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
+               struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
+{
+       unsigned i, j;
+       int result = 0;
+       int slave_reta_size;
+       unsigned reta_count;
+       struct bond_dev_private *internals = dev->data->dev_private;
+
+       if (reta_size != internals->reta_size)
+               return -EINVAL;
+
+        /* Copy RETA table */
+       reta_count = reta_size / RTE_RETA_GROUP_SIZE;
+
+       for (i = 0; i < reta_count; i++) {
+               internals->reta_conf[i].mask = reta_conf[i].mask;
+               for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+                       if ((reta_conf[i].mask >> j) & 0x01)
+                               internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
+       }
+
+       /* Fill rest of array */
+       for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
+               memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
+                               sizeof(internals->reta_conf[0]) * reta_count);
+
+       /* Propagate RETA over slaves */
+       for (i = 0; i < internals->slave_count; i++) {
+               slave_reta_size = internals->slaves[i].reta_size;
+               result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
+                               &internals->reta_conf[0], slave_reta_size);
+               if (result < 0)
+                       return result;
+       }
+
+       return 0;
+}
+
+static int
+bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
+               struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
+{
+       int i, j;
+       struct bond_dev_private *internals = dev->data->dev_private;
+
+       if (reta_size != internals->reta_size)
+               return -EINVAL;
+
+        /* Copy RETA table */
+       for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
+               for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+                       if ((reta_conf[i].mask >> j) & 0x01)
+                               reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
+
+       return 0;
+}
+
+static int
+bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
+               struct rte_eth_rss_conf *rss_conf)
+{
+       int i, result = 0;
+       struct bond_dev_private *internals = dev->data->dev_private;
+       struct rte_eth_rss_conf bond_rss_conf;
+
+       memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
+
+       bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
+
+       if (bond_rss_conf.rss_hf != 0)
+               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
+
+       if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
+                       sizeof(internals->rss_key)) {
+               if (bond_rss_conf.rss_key_len == 0)
+                       bond_rss_conf.rss_key_len = 40;
+               internals->rss_key_len = bond_rss_conf.rss_key_len;
+               memcpy(internals->rss_key, bond_rss_conf.rss_key,
+                               internals->rss_key_len);
+       }
+
+       for (i = 0; i < internals->slave_count; i++) {
+               result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
+                               &bond_rss_conf);
+               if (result < 0)
+                       return result;
+       }
+
+       return 0;
+}
+
+static int
+bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
+               struct rte_eth_rss_conf *rss_conf)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+
+       rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+       rss_conf->rss_key_len = internals->rss_key_len;
+       if (rss_conf->rss_key)
+               memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
+
+       return 0;
+}
+
 struct eth_dev_ops default_dev_ops = {
-               .dev_start = bond_ethdev_start,
-               .dev_stop = bond_ethdev_stop,
-               .dev_close = bond_ethdev_close,
-               .dev_configure = bond_ethdev_configure,
-               .dev_infos_get = bond_ethdev_info,
-               .rx_queue_setup = bond_ethdev_rx_queue_setup,
-               .tx_queue_setup = bond_ethdev_tx_queue_setup,
-               .rx_queue_release = bond_ethdev_rx_queue_release,
-               .tx_queue_release = bond_ethdev_tx_queue_release,
-               .link_update = bond_ethdev_link_update,
-               .stats_get = bond_ethdev_stats_get,
-               .stats_reset = bond_ethdev_stats_reset,
-               .promiscuous_enable = bond_ethdev_promiscuous_enable,
-               .promiscuous_disable = bond_ethdev_promiscuous_disable
+               .dev_start            = bond_ethdev_start,
+               .dev_stop             = bond_ethdev_stop,
+               .dev_close            = bond_ethdev_close,
+               .dev_configure        = bond_ethdev_configure,
+               .dev_infos_get        = bond_ethdev_info,
+               .rx_queue_setup       = bond_ethdev_rx_queue_setup,
+               .tx_queue_setup       = bond_ethdev_tx_queue_setup,
+               .rx_queue_release     = bond_ethdev_rx_queue_release,
+               .tx_queue_release     = bond_ethdev_tx_queue_release,
+               .link_update          = bond_ethdev_link_update,
+               .stats_get            = bond_ethdev_stats_get,
+               .stats_reset          = bond_ethdev_stats_reset,
+               .promiscuous_enable   = bond_ethdev_promiscuous_enable,
+               .promiscuous_disable  = bond_ethdev_promiscuous_disable,
+               .reta_update          = bond_ethdev_rss_reta_update,
+               .reta_query           = bond_ethdev_rss_reta_query,
+               .rss_hash_update      = bond_ethdev_rss_hash_update,
+               .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
 };
 
 static int
@@ -2090,6 +2245,28 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
        int arg_count;
        uint8_t port_id = dev - rte_eth_devices;
 
+       static const uint8_t default_rss_key[40] = {
+               0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
+               0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
+               0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
+               0xBE, 0xAC, 0x01, 0xFA
+       };
+
+       unsigned i, j;
+
+       /* If RSS is enabled, fill table and key with default values */
+       if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
+               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
+               memcpy(internals->rss_key, default_rss_key, 40);
+
+               for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
+                       internals->reta_conf[i].mask = ~0LL;
+                       for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+                               internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
+               }
+       }
+
        /*
         * if no kvlist, it means that this bonded device has been created
         * through the bonding api.
index 038bca64ff0716699290ec88b7447c26ed0711f9..e7af8099918c797f5ed8ec58d5d1db23bd9b09fe 100644 (file)
@@ -103,6 +103,8 @@ struct bond_slave_details {
        uint8_t last_link_status;
        /**< Port Id of slave eth_dev */
        struct ether_addr persisted_mac_addr;
+
+       uint16_t reta_size;
 };
 
 
@@ -155,6 +157,16 @@ struct bond_dev_private {
        uint32_t rx_offload_capa;            /** Rx offload capability */
        uint32_t tx_offload_capa;            /** Tx offload capability */
 
+       /** Bit mask of RSS offloads, the bit offset also means flow type */
+       uint64_t flow_type_rss_offloads;
+
+       uint16_t reta_size;
+       struct rte_eth_rss_reta_entry64 reta_conf[ETH_RSS_RETA_SIZE_512 /
+                       RTE_RETA_GROUP_SIZE];
+
+       uint8_t rss_key[52];                            /**< 52-byte hash key buffer. */
+       uint8_t rss_key_len;                            /**< hash key length in bytes. */
+
        struct rte_kvargs *kvlist;
        uint8_t slave_update_idx;
 };