net/bonding: fix Rx slave fairness
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
index ca74eb0..86e78bd 100644 (file)
@@ -37,7 +37,8 @@ get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
 {
        size_t vlan_offset = 0;
 
-       if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
+       if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
+               rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
                struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
 
                vlan_offset = sizeof(struct vlan_hdr);
@@ -57,28 +58,34 @@ bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
        struct bond_dev_private *internals;
 
-       uint16_t num_rx_slave = 0;
        uint16_t num_rx_total = 0;
-
+       uint16_t slave_count;
+       uint16_t active_slave;
        int i;
 
        /* Cast to structure, containing bonded device's port id and queue id */
        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
-
        internals = bd_rx_q->dev_private;
+       slave_count = internals->active_slave_count;
+       active_slave = internals->active_slave;
 
+       for (i = 0; i < slave_count && nb_pkts; i++) {
+               uint16_t num_rx_slave;
 
-       for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
                /* Offset of pointer to *bufs increases as packets are received
                 * from other slaves */
-               num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
-                               bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
-               if (num_rx_slave) {
-                       num_rx_total += num_rx_slave;
-                       nb_pkts -= num_rx_slave;
-               }
+               num_rx_slave =
+                       rte_eth_rx_burst(internals->active_slaves[active_slave],
+                                        bd_rx_q->queue_id,
+                                        bufs + num_rx_total, nb_pkts);
+               num_rx_total += num_rx_slave;
+               nb_pkts -= num_rx_slave;
+               if (++active_slave == slave_count)
+                       active_slave = 0;
        }
 
+       if (++internals->active_slave == slave_count)
+               internals->active_slave = 0;
        return num_rx_total;
 }
 
@@ -257,25 +264,32 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
        uint16_t num_rx_total = 0;      /* Total number of received packets */
        uint16_t slaves[RTE_MAX_ETHPORTS];
        uint16_t slave_count;
-
-       uint16_t i, idx;
+       uint16_t active_slave;
+       uint16_t i;
 
        /* Copy slave list to protect against slave up/down changes during tx
         * bursting */
        slave_count = internals->active_slave_count;
+       active_slave = internals->active_slave;
        memcpy(slaves, internals->active_slaves,
                        sizeof(internals->active_slaves[0]) * slave_count);
 
-       for (i = 0, idx = internals->active_slave;
-                       i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
-               idx = idx % slave_count;
+       for (i = 0; i < slave_count && nb_pkts; i++) {
+               uint16_t num_rx_slave;
 
                /* Read packets from this slave */
-               num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
-                               &bufs[num_rx_total], nb_pkts - num_rx_total);
+               num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
+                                               bd_rx_q->queue_id,
+                                               bufs + num_rx_total, nb_pkts);
+               num_rx_total += num_rx_slave;
+               nb_pkts -= num_rx_slave;
+
+               if (++active_slave == slave_count)
+                       active_slave = 0;
        }
 
-       internals->active_slave = idx;
+       if (++internals->active_slave == slave_count)
+               internals->active_slave = 0;
 
        return num_rx_total;
 }
@@ -300,10 +314,10 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
        /* Mapping array generated by hash function to map mbufs to slaves */
        uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t slave_tx_count;
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       uint16_t i, j;
+       uint16_t i;
 
        if (unlikely(nb_bufs == 0))
                return 0;
@@ -358,34 +372,12 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 
                /* If tx burst fails move packets to end of bufs */
                if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
-                       slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                       int slave_tx_fail_count = slave_nb_bufs[i] -
                                        slave_tx_count;
-                       total_tx_fail_count += slave_tx_fail_count[i];
-
-                       /*
-                        * Shift bufs to beginning of array to allow reordering
-                        * later
-                        */
-                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
-                               slave_bufs[i][j] =
-                                       slave_bufs[i][(slave_tx_count - 1) + j];
-                       }
-               }
-       }
-
-       /*
-        * If there are tx burst failures we move packets to end of bufs to
-        * preserve expected PMD behaviour of all failed transmitted being
-        * at the end of the input mbuf array
-        */
-       if (unlikely(total_tx_fail_count > 0)) {
-               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-               for (i = 0; i < slave_count; i++) {
-                       if (slave_tx_fail_count[i] > 0) {
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       bufs[bufs_idx++] = slave_bufs[i][j];
-                       }
+                       total_tx_fail_count += slave_tx_fail_count;
+                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                              &slave_bufs[i][slave_tx_count],
+                              slave_tx_fail_count * sizeof(bufs[0]));
                }
        }
 
@@ -480,7 +472,9 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                        idx = 0;
        }
 
-       internals->active_slave = idx;
+       if (++internals->active_slave == slave_count)
+               internals->active_slave = 0;
+
        return num_rx_total;
 }
 
@@ -715,8 +709,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
                                tx_fail_total += tx_fail_slave;
 
                                memcpy(&bufs[nb_pkts - tx_fail_total],
-                                               &slave_bufs[i][num_tx_slave],
-                                               tx_fail_slave * sizeof(bufs[0]));
+                                      &slave_bufs[i][num_tx_slave],
+                                      tx_fail_slave * sizeof(bufs[0]));
                        }
                        num_tx_total += num_tx_slave;
                }
@@ -1221,10 +1215,10 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
        /* Mapping array generated by hash function to map mbufs to slaves */
        uint16_t bufs_slave_port_idxs[nb_bufs];
 
-       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t slave_tx_count;
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       uint16_t i, j;
+       uint16_t i;
 
        if (unlikely(nb_bufs == 0))
                return 0;
@@ -1265,34 +1259,12 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 
                /* If tx burst fails move packets to end of bufs */
                if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
-                       slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                       int slave_tx_fail_count = slave_nb_bufs[i] -
                                        slave_tx_count;
-                       total_tx_fail_count += slave_tx_fail_count[i];
-
-                       /*
-                        * Shift bufs to beginning of array to allow reordering
-                        * later
-                        */
-                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
-                               slave_bufs[i][j] =
-                                       slave_bufs[i][(slave_tx_count - 1) + j];
-                       }
-               }
-       }
-
-       /*
-        * If there are tx burst failures we move packets to end of bufs to
-        * preserve expected PMD behaviour of all failed transmitted being
-        * at the end of the input mbuf array
-        */
-       if (unlikely(total_tx_fail_count > 0)) {
-               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-               for (i = 0; i < slave_count; i++) {
-                       if (slave_tx_fail_count[i] > 0) {
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       bufs[bufs_idx++] = slave_bufs[i][j];
-                       }
+                       total_tx_fail_count += slave_tx_fail_count;
+                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                              &slave_bufs[i][slave_tx_count],
+                              slave_tx_fail_count * sizeof(bufs[0]));
                }
        }
 
@@ -1319,10 +1291,10 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
        /* Mapping array generated by hash function to map mbufs to slaves */
        uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t slave_tx_count;
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       uint16_t i, j;
+       uint16_t i;
 
        if (unlikely(nb_bufs == 0))
                return 0;
@@ -1380,39 +1352,13 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 
                        /* If tx burst fails move packets to end of bufs */
                        if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
-                               slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                               int slave_tx_fail_count = slave_nb_bufs[i] -
                                                slave_tx_count;
-                               total_tx_fail_count += slave_tx_fail_count[i];
-
-                               /*
-                                * Shift bufs to beginning of array to allow
-                                * reordering later
-                                */
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       slave_bufs[i][j] =
-                                               slave_bufs[i]
-                                                       [(slave_tx_count - 1)
-                                                       + j];
-                       }
-               }
+                               total_tx_fail_count += slave_tx_fail_count;
 
-               /*
-                * If there are tx burst failures we move packets to end of
-                * bufs to preserve expected PMD behaviour of all failed
-                * transmitted being at the end of the input mbuf array
-                */
-               if (unlikely(total_tx_fail_count > 0)) {
-                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-                       for (i = 0; i < slave_count; i++) {
-                               if (slave_tx_fail_count[i] > 0) {
-                                       for (j = 0;
-                                               j < slave_tx_fail_count[i];
-                                               j++) {
-                                               bufs[bufs_idx++] =
-                                                       slave_bufs[i][j];
-                                       }
-                               }
+                               memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                                      &slave_bufs[i][slave_tx_count],
+                                      slave_tx_fail_count * sizeof(bufs[0]));
                        }
                }
        }
@@ -1847,12 +1793,11 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
 
        /* If RSS is enabled for bonding, try to enable it for slaves  */
        if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
-               if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
-                               != 0) {
+               if (internals->rss_key_len != 0) {
                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
-                                       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
+                                       internals->rss_key_len;
                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
-                                       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
+                                       internals->rss_key;
                } else {
                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
                }
@@ -2732,6 +2677,17 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
                        mac_address_slaves_update(bonded_eth_dev);
                }
 
+               /* check link state properties if bonded link is up*/
+               if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
+                       if (link_properties_valid(bonded_eth_dev, &link) != 0)
+                               RTE_BOND_LOG(ERR, "Invalid link properties "
+                                            "for slave %d in bonding mode %d",
+                                            port_id, internals->mode);
+               } else {
+                       /* inherit slave link properties */
+                       link_properties_set(bonded_eth_dev, &link);
+               }
+
                activate_slave(bonded_eth_dev, port_id);
 
                /* If user has defined the primary port then default to using it */
@@ -3342,16 +3298,30 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
 
        unsigned i, j;
 
-       /* If RSS is enabled, fill table and key with default values */
+       /*
+        * If RSS is enabled, fill table with default values and
+        * set key to the the value specified in port RSS configuration.
+        * Fall back to default RSS key if the key is not specified
+        */
        if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
-               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
-               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
-               memcpy(internals->rss_key, default_rss_key, 40);
+               if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
+                       internals->rss_key_len =
+                               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
+                       memcpy(internals->rss_key,
+                              dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
+                              internals->rss_key_len);
+               } else {
+                       internals->rss_key_len = sizeof(default_rss_key);
+                       memcpy(internals->rss_key, default_rss_key,
+                              internals->rss_key_len);
+               }
 
                for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
                        internals->reta_conf[i].mask = ~0LL;
                        for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
-                               internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
+                               internals->reta_conf[i].reta[j] =
+                                               (i * RTE_RETA_GROUP_SIZE + j) %
+                                               dev->data->nb_rx_queues;
                }
        }