net/bonding: fix link status
[dpdk.git] / drivers / net / bonding / rte_eth_bond_api.c
index b73cb73..57ef2f0 100644 (file)
@@ -19,7 +19,10 @@ int
 check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev)
 {
        /* Check valid pointer */
-       if (eth_dev->device->driver->name == NULL)
+       if (eth_dev == NULL ||
+               eth_dev->device == NULL ||
+               eth_dev->device->driver == NULL ||
+               eth_dev->device->driver->name == NULL)
                return -1;
 
        /* return 0 if driver name matches */
@@ -194,7 +197,8 @@ slave_vlan_filter_set(uint16_t bonded_port_id, uint16_t slave_port_id)
        uint16_t first;
 
        bonded_eth_dev = &rte_eth_devices[bonded_port_id];
-       if (bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter == 0)
+       if ((bonded_eth_dev->data->dev_conf.rxmode.offloads &
+                       DEV_RX_OFFLOAD_VLAN_FILTER) == 0)
                return 0;
 
        internals = bonded_eth_dev->data->dev_private;
@@ -211,9 +215,12 @@ slave_vlan_filter_set(uint16_t bonded_port_id, uint16_t slave_port_id)
                for (i = 0, mask = 1;
                     i < RTE_BITMAP_SLAB_BIT_SIZE;
                     i ++, mask <<= 1) {
-                       if (unlikely(slab & mask))
+                       if (unlikely(slab & mask)) {
+                               uint16_t vlan_id = pos + i;
+
                                res = rte_eth_dev_vlan_filter(slave_port_id,
-                                                             (uint16_t)pos, 1);
+                                                             vlan_id, 1);
+                       }
                }
                found = rte_bitmap_scan(internals->vlan_filter_bmp,
                                        &pos, &slab);
@@ -222,6 +229,216 @@ slave_vlan_filter_set(uint16_t bonded_port_id, uint16_t slave_port_id)
        return res;
 }
 
+static int
+slave_rte_flow_prepare(uint16_t slave_id, struct bond_dev_private *internals)
+{
+       struct rte_flow *flow;
+       struct rte_flow_error ferror;
+       uint16_t slave_port_id = internals->slaves[slave_id].port_id;
+
+       if (internals->flow_isolated_valid != 0) {
+               rte_eth_dev_stop(slave_port_id);
+               if (rte_flow_isolate(slave_port_id, internals->flow_isolated,
+                   &ferror)) {
+                       RTE_BOND_LOG(ERR, "rte_flow_isolate failed for slave"
+                                    " %d: %s", slave_id, ferror.message ?
+                                    ferror.message : "(no stated reason)");
+                       return -1;
+               }
+       }
+       TAILQ_FOREACH(flow, &internals->flow_list, next) {
+               flow->flows[slave_id] = rte_flow_create(slave_port_id,
+                                                       flow->rule.attr,
+                                                       flow->rule.pattern,
+                                                       flow->rule.actions,
+                                                       &ferror);
+               if (flow->flows[slave_id] == NULL) {
+                       RTE_BOND_LOG(ERR, "Cannot create flow for slave"
+                                    " %d: %s", slave_id,
+                                    ferror.message ? ferror.message :
+                                    "(no stated reason)");
+                       /* Destroy successful bond flows from the slave */
+                       TAILQ_FOREACH(flow, &internals->flow_list, next) {
+                               if (flow->flows[slave_id] != NULL) {
+                                       rte_flow_destroy(slave_port_id,
+                                                        flow->flows[slave_id],
+                                                        &ferror);
+                                       flow->flows[slave_id] = NULL;
+                               }
+                       }
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static void
+eth_bond_slave_inherit_dev_info_rx_first(struct bond_dev_private *internals,
+                                        const struct rte_eth_dev_info *di)
+{
+       struct rte_eth_rxconf *rxconf_i = &internals->default_rxconf;
+
+       internals->reta_size = di->reta_size;
+
+       /* Inherit Rx offload capabilities from the first slave device */
+       internals->rx_offload_capa = di->rx_offload_capa;
+       internals->rx_queue_offload_capa = di->rx_queue_offload_capa;
+       internals->flow_type_rss_offloads = di->flow_type_rss_offloads;
+
+       /* Inherit maximum Rx packet size from the first slave device */
+       internals->candidate_max_rx_pktlen = di->max_rx_pktlen;
+
+       /* Inherit default Rx queue settings from the first slave device */
+       memcpy(rxconf_i, &di->default_rxconf, sizeof(*rxconf_i));
+
+       /*
+        * Turn off descriptor prefetch and writeback by default for all
+        * slave devices. Applications may tweak this setting if need be.
+        */
+       rxconf_i->rx_thresh.pthresh = 0;
+       rxconf_i->rx_thresh.hthresh = 0;
+       rxconf_i->rx_thresh.wthresh = 0;
+
+       /* Setting this to zero should effectively enable default values */
+       rxconf_i->rx_free_thresh = 0;
+
+       /* Disable deferred start by default for all slave devices */
+       rxconf_i->rx_deferred_start = 0;
+}
+
+static void
+eth_bond_slave_inherit_dev_info_tx_first(struct bond_dev_private *internals,
+                                        const struct rte_eth_dev_info *di)
+{
+       struct rte_eth_txconf *txconf_i = &internals->default_txconf;
+
+       /* Inherit Tx offload capabilities from the first slave device */
+       internals->tx_offload_capa = di->tx_offload_capa;
+       internals->tx_queue_offload_capa = di->tx_queue_offload_capa;
+
+       /* Inherit default Tx queue settings from the first slave device */
+       memcpy(txconf_i, &di->default_txconf, sizeof(*txconf_i));
+
+       /*
+        * Turn off descriptor prefetch and writeback by default for all
+        * slave devices. Applications may tweak this setting if need be.
+        */
+       txconf_i->tx_thresh.pthresh = 0;
+       txconf_i->tx_thresh.hthresh = 0;
+       txconf_i->tx_thresh.wthresh = 0;
+
+       /*
+        * Setting these parameters to zero assumes that default
+        * values will be configured implicitly by slave devices.
+        */
+       txconf_i->tx_free_thresh = 0;
+       txconf_i->tx_rs_thresh = 0;
+
+       /* Disable deferred start by default for all slave devices */
+       txconf_i->tx_deferred_start = 0;
+}
+
+static void
+eth_bond_slave_inherit_dev_info_rx_next(struct bond_dev_private *internals,
+                                       const struct rte_eth_dev_info *di)
+{
+       struct rte_eth_rxconf *rxconf_i = &internals->default_rxconf;
+       const struct rte_eth_rxconf *rxconf = &di->default_rxconf;
+
+       internals->rx_offload_capa &= di->rx_offload_capa;
+       internals->rx_queue_offload_capa &= di->rx_queue_offload_capa;
+       internals->flow_type_rss_offloads &= di->flow_type_rss_offloads;
+
+       /*
+        * If at least one slave device suggests enabling this
+        * setting by default, enable it for all slave devices
+        * since disabling it may not be necessarily supported.
+        */
+       if (rxconf->rx_drop_en == 1)
+               rxconf_i->rx_drop_en = 1;
+
+       /*
+        * Adding a new slave device may cause some of previously inherited
+        * offloads to be withdrawn from the internal rx_queue_offload_capa
+        * value. Thus, the new internal value of default Rx queue offloads
+        * has to be masked by rx_queue_offload_capa to make sure that only
+        * commonly supported offloads are preserved from both the previous
+        * value and the value being inhereted from the new slave device.
+        */
+       rxconf_i->offloads = (rxconf_i->offloads | rxconf->offloads) &
+                            internals->rx_queue_offload_capa;
+
+       /*
+        * RETA size is GCD of all slaves RETA sizes, so, if all sizes will be
+        * the power of 2, the lower one is GCD
+        */
+       if (internals->reta_size > di->reta_size)
+               internals->reta_size = di->reta_size;
+
+       if (!internals->max_rx_pktlen &&
+           di->max_rx_pktlen < internals->candidate_max_rx_pktlen)
+               internals->candidate_max_rx_pktlen = di->max_rx_pktlen;
+}
+
+static void
+eth_bond_slave_inherit_dev_info_tx_next(struct bond_dev_private *internals,
+                                       const struct rte_eth_dev_info *di)
+{
+       struct rte_eth_txconf *txconf_i = &internals->default_txconf;
+       const struct rte_eth_txconf *txconf = &di->default_txconf;
+
+       internals->tx_offload_capa &= di->tx_offload_capa;
+       internals->tx_queue_offload_capa &= di->tx_queue_offload_capa;
+
+       /*
+        * Adding a new slave device may cause some of previously inherited
+        * offloads to be withdrawn from the internal tx_queue_offload_capa
+        * value. Thus, the new internal value of default Tx queue offloads
+        * has to be masked by tx_queue_offload_capa to make sure that only
+        * commonly supported offloads are preserved from both the previous
+        * value and the value being inhereted from the new slave device.
+        */
+       txconf_i->offloads = (txconf_i->offloads | txconf->offloads) &
+                            internals->tx_queue_offload_capa;
+}
+
+static void
+eth_bond_slave_inherit_desc_lim_first(struct rte_eth_desc_lim *bond_desc_lim,
+               const struct rte_eth_desc_lim *slave_desc_lim)
+{
+       memcpy(bond_desc_lim, slave_desc_lim, sizeof(*bond_desc_lim));
+}
+
+static int
+eth_bond_slave_inherit_desc_lim_next(struct rte_eth_desc_lim *bond_desc_lim,
+               const struct rte_eth_desc_lim *slave_desc_lim)
+{
+       bond_desc_lim->nb_max = RTE_MIN(bond_desc_lim->nb_max,
+                                       slave_desc_lim->nb_max);
+       bond_desc_lim->nb_min = RTE_MAX(bond_desc_lim->nb_min,
+                                       slave_desc_lim->nb_min);
+       bond_desc_lim->nb_align = RTE_MAX(bond_desc_lim->nb_align,
+                                         slave_desc_lim->nb_align);
+
+       if (bond_desc_lim->nb_min > bond_desc_lim->nb_max ||
+           bond_desc_lim->nb_align > bond_desc_lim->nb_max) {
+               RTE_BOND_LOG(ERR, "Failed to inherit descriptor limits");
+               return -EINVAL;
+       }
+
+       /* Treat maximum number of segments equal to 0 as unspecified */
+       if (slave_desc_lim->nb_seg_max != 0 &&
+           (bond_desc_lim->nb_seg_max == 0 ||
+            slave_desc_lim->nb_seg_max < bond_desc_lim->nb_seg_max))
+               bond_desc_lim->nb_seg_max = slave_desc_lim->nb_seg_max;
+       if (slave_desc_lim->nb_mtu_seg_max != 0 &&
+           (bond_desc_lim->nb_mtu_seg_max == 0 ||
+            slave_desc_lim->nb_mtu_seg_max < bond_desc_lim->nb_mtu_seg_max))
+               bond_desc_lim->nb_mtu_seg_max = slave_desc_lim->nb_mtu_seg_max;
+
+       return 0;
+}
+
 static int
 __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
@@ -259,12 +476,13 @@ __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
        if (internals->slave_count < 1) {
                /* if MAC is not user defined then use MAC of first slave add to
                 * bonded device */
-               if (!internals->user_defined_mac)
-                       mac_address_set(bonded_eth_dev, slave_eth_dev->data->mac_addrs);
-
-               /* Inherit eth dev link properties from first slave */
-               link_properties_set(bonded_eth_dev,
-                               &(slave_eth_dev->data->dev_link));
+               if (!internals->user_defined_mac) {
+                       if (mac_address_set(bonded_eth_dev,
+                                           slave_eth_dev->data->mac_addrs)) {
+                               RTE_BOND_LOG(ERR, "Failed to set MAC address");
+                               return -1;
+                       }
+               }
 
                /* Make primary slave */
                internals->primary_port = slave_port_id;
@@ -274,43 +492,46 @@ __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
                internals->nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
                internals->nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
 
-               internals->reta_size = dev_info.reta_size;
-
-               /* Take the first dev's offload capabilities */
-               internals->rx_offload_capa = dev_info.rx_offload_capa;
-               internals->tx_offload_capa = dev_info.tx_offload_capa;
-               internals->flow_type_rss_offloads = dev_info.flow_type_rss_offloads;
-
-               /* Inherit first slave's max rx packet size */
-               internals->candidate_max_rx_pktlen = dev_info.max_rx_pktlen;
+               eth_bond_slave_inherit_dev_info_rx_first(internals, &dev_info);
+               eth_bond_slave_inherit_dev_info_tx_first(internals, &dev_info);
 
+               eth_bond_slave_inherit_desc_lim_first(&internals->rx_desc_lim,
+                                                     &dev_info.rx_desc_lim);
+               eth_bond_slave_inherit_desc_lim_first(&internals->tx_desc_lim,
+                                                     &dev_info.tx_desc_lim);
        } else {
-               internals->rx_offload_capa &= dev_info.rx_offload_capa;
-               internals->tx_offload_capa &= dev_info.tx_offload_capa;
-               internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads;
-
-               if (link_properties_valid(bonded_eth_dev,
-                               &slave_eth_dev->data->dev_link) != 0) {
-                       RTE_BOND_LOG(ERR, "Invalid link properties for slave %d"
-                                       " in bonding mode %d", slave_port_id,
-                                       internals->mode);
-                       return -1;
-               }
+               int ret;
 
-               /* RETA size is GCD of all slaves RETA sizes, so, if all sizes will be
-                * the power of 2, the lower one is GCD
-                */
-               if (internals->reta_size > dev_info.reta_size)
-                       internals->reta_size = dev_info.reta_size;
+               eth_bond_slave_inherit_dev_info_rx_next(internals, &dev_info);
+               eth_bond_slave_inherit_dev_info_tx_next(internals, &dev_info);
+
+               ret = eth_bond_slave_inherit_desc_lim_next(
+                               &internals->rx_desc_lim, &dev_info.rx_desc_lim);
+               if (ret != 0)
+                       return ret;
 
-               if (!internals->max_rx_pktlen &&
-                   dev_info.max_rx_pktlen < internals->candidate_max_rx_pktlen)
-                       internals->candidate_max_rx_pktlen = dev_info.max_rx_pktlen;
+               ret = eth_bond_slave_inherit_desc_lim_next(
+                               &internals->tx_desc_lim, &dev_info.tx_desc_lim);
+               if (ret != 0)
+                       return ret;
        }
 
        bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
                        internals->flow_type_rss_offloads;
 
+       if (slave_rte_flow_prepare(internals->slave_count, internals) != 0) {
+               RTE_BOND_LOG(ERR, "Failed to prepare new slave flows: port=%d",
+                            slave_port_id);
+               return -1;
+       }
+
+       /* Add additional MAC addresses to the slave */
+       if (slave_add_mac_addresses(bonded_eth_dev, slave_port_id) != 0) {
+               RTE_BOND_LOG(ERR, "Failed to add mac address(es) to slave %hu",
+                               slave_port_id);
+               return -1;
+       }
+
        internals->slave_count++;
 
        if (bonded_eth_dev->data->dev_started) {
@@ -325,7 +546,7 @@ __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
        /* Add slave details to bonded device */
        slave_eth_dev->data->dev_flags |= RTE_ETH_DEV_BONDED_SLAVE;
 
-       /* Update all slave devices MACs*/
+       /* Update all slave devices MACs */
        mac_address_slaves_update(bonded_eth_dev);
 
        /* Register link status change callback with bonded device pointer as
@@ -343,11 +564,6 @@ __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
                            !internals->user_defined_primary_port)
                                bond_ethdev_primary_set(internals,
                                                        slave_port_id);
-
-                       if (find_slave_by_id(internals->active_slaves,
-                                            internals->active_slave_count,
-                                            slave_port_id) == internals->active_slave_count)
-                               activate_slave(bonded_eth_dev, slave_port_id);
                }
        }
 
@@ -388,6 +604,8 @@ __eth_bond_slave_remove_lock_free(uint16_t bonded_port_id,
        struct rte_eth_dev *bonded_eth_dev;
        struct bond_dev_private *internals;
        struct rte_eth_dev *slave_eth_dev;
+       struct rte_flow_error flow_error;
+       struct rte_flow *flow;
        int i, slave_idx;
 
        bonded_eth_dev = &rte_eth_devices[bonded_port_id];
@@ -427,6 +645,21 @@ __eth_bond_slave_remove_lock_free(uint16_t bonded_port_id,
        rte_eth_dev_default_mac_addr_set(slave_port_id,
                        &(internals->slaves[slave_idx].persisted_mac_addr));
 
+       /* remove additional MAC addresses from the slave */
+       slave_remove_mac_addresses(bonded_eth_dev, slave_port_id);
+
+       /*
+        * Remove bond device flows from slave device.
+        * Note: don't restore flow isolate mode.
+        */
+       TAILQ_FOREACH(flow, &internals->flow_list, next) {
+               if (flow->flows[slave_idx] != NULL) {
+                       rte_flow_destroy(slave_port_id, flow->flows[slave_idx],
+                                        &flow_error);
+                       flow->flows[slave_idx] = NULL;
+               }
+       }
+
        slave_eth_dev = &rte_eth_devices[slave_port_id];
        slave_remove(internals, slave_eth_dev);
        slave_eth_dev->data->dev_flags &= (~RTE_ETH_DEV_BONDED_SLAVE);
@@ -453,6 +686,8 @@ __eth_bond_slave_remove_lock_free(uint16_t bonded_port_id,
        if (internals->slave_count == 0) {
                internals->rx_offload_capa = 0;
                internals->tx_offload_capa = 0;
+               internals->rx_queue_offload_capa = 0;
+               internals->tx_queue_offload_capa = 0;
                internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
                internals->reta_size = 0;
                internals->candidate_max_rx_pktlen = 0;
@@ -638,9 +873,21 @@ rte_eth_bond_mac_address_reset(uint16_t bonded_port_id)
        internals->user_defined_mac = 0;
 
        if (internals->slave_count > 0) {
+               int slave_port;
+               /* Get the primary slave location based on the primary port
+                * number as, while slave_add(), we will keep the primary
+                * slave based on slave_count,but not based on the primary port.
+                */
+               for (slave_port = 0; slave_port < internals->slave_count;
+                    slave_port++) {
+                       if (internals->slaves[slave_port].port_id ==
+                           internals->primary_port)
+                               break;
+               }
+
                /* Set MAC Address of Bonded Device */
                if (mac_address_set(bonded_eth_dev,
-                               &internals->slaves[internals->primary_port].persisted_mac_addr)
+                       &internals->slaves[slave_port].persisted_mac_addr)
                                != 0) {
                        RTE_BOND_LOG(ERR, "Failed to set MAC address on bonded device");
                        return -1;
@@ -665,12 +912,15 @@ rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy)
        switch (policy) {
        case BALANCE_XMIT_POLICY_LAYER2:
                internals->balance_xmit_policy = policy;
+               internals->burst_xmit_hash = burst_xmit_l2_hash;
                break;
        case BALANCE_XMIT_POLICY_LAYER23:
                internals->balance_xmit_policy = policy;
+               internals->burst_xmit_hash = burst_xmit_l23_hash;
                break;
        case BALANCE_XMIT_POLICY_LAYER34:
                internals->balance_xmit_policy = policy;
+               internals->burst_xmit_hash = burst_xmit_l34_hash;
                break;
 
        default: