* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-
+#include <stdlib.h>
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <rte_ethdev.h>
#include <rte_kvargs.h>
#include <rte_dev.h>
#include <rte_alarm.h>
+#include <rte_cycles.h>
#include "rte_eth_bond.h"
#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad_private.h"
+
+#define REORDER_PERIOD_MS 10
+/* Table for statistics in mode 5 TLB */
+static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
static uint16_t
bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
bd_rx_q->queue_id, bufs, nb_pkts);
}
+static uint16_t
+bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ /* Cast to structure, containing bonded device's port id and queue id */
+ struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+ struct bond_dev_private *internals = bd_rx_q->dev_private;
+ struct ether_addr bond_mac;
+
+ struct ether_hdr *hdr;
+
+ const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
+ uint16_t num_rx_total = 0; /* Total number of received packets */
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+ uint8_t slave_count;
+
+ uint8_t collecting; /* current slave collecting status */
+ const uint8_t promisc = internals->promiscuous_en;
+ uint8_t i, j, k;
+
+ rte_eth_macaddr_get(internals->port_id, &bond_mac);
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ slave_count = internals->active_slave_count;
+ memcpy(slaves, internals->active_slaves,
+ sizeof(internals->active_slaves[0]) * slave_count);
+
+ for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
+ j = num_rx_total;
+ collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
+
+ /* Read packets from this slave */
+ num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
+ &bufs[num_rx_total], nb_pkts - num_rx_total);
+
+ for (k = j; k < 2 && k < num_rx_total; k++)
+ rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
+
+ /* Handle slow protocol packets. */
+ while (j < num_rx_total) {
+ if (j + 3 < num_rx_total)
+ rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
+
+ hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+ /* Remove packet from array if it is slow packet or slave is not
+ * in collecting state or bondign interface is not in promiscus
+ * mode and packet address does not match. */
+ if (unlikely(hdr->ether_type == ether_type_slow_be ||
+ !collecting || (!promisc &&
+ !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
+
+ if (hdr->ether_type == ether_type_slow_be) {
+ bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
+ bufs[j]);
+ } else
+ rte_pktmbuf_free(bufs[j]);
+
+ /* Packet is managed by mode 4 or dropped, shift the array */
+ num_rx_total--;
+ if (j < num_rx_total) {
+ memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
+ (num_rx_total - j));
+ }
+ } else
+ j++;
+ }
+ }
+
+ return num_rx_total;
+}
+
static uint16_t
bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
uint16_t nb_pkts)
tx_fail_total += tx_fail_slave;
memcpy(&bufs[nb_pkts - tx_fail_total],
- &slave_bufs[i][num_tx_slave], tx_fail_slave * sizeof(bufs[0]));
+ &slave_bufs[i][num_tx_slave],
+ tx_fail_slave * sizeof(bufs[0]));
}
num_tx_total += num_tx_slave;
}
return hash % slave_count;
}
+struct bwg_slave {
+ uint64_t bwg_left_int;
+ uint64_t bwg_left_remainder;
+ uint8_t slave;
+};
+
+static int
+bandwidth_cmp(const void *a, const void *b)
+{
+ const struct bwg_slave *bwg_a = a;
+ const struct bwg_slave *bwg_b = b;
+ int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
+ int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
+ (int64_t)bwg_a->bwg_left_remainder;
+ if (diff > 0)
+ return 1;
+ else if (diff < 0)
+ return -1;
+ else if (diff2 > 0)
+ return 1;
+ else if (diff2 < 0)
+ return -1;
+ else
+ return 0;
+}
+
+static void
+bandwidth_left(int port_id, uint64_t load, uint8_t update_idx,
+ struct bwg_slave *bwg_slave)
+{
+ struct rte_eth_link link_status;
+
+ rte_eth_link_get(port_id, &link_status);
+ uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
+ if (link_bwg == 0)
+ return;
+ link_bwg = (link_bwg * (update_idx+1) * REORDER_PERIOD_MS);
+ bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
+ bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
+}
+
+static void
+bond_ethdev_update_tlb_slave_cb(void *arg)
+{
+ struct bond_dev_private *internals = arg;
+ struct rte_eth_stats slave_stats;
+ struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
+ uint8_t slave_count;
+ uint64_t tx_bytes;
+
+ uint8_t update_stats = 0;
+ uint8_t i, slave_id;
+
+ internals->slave_update_idx++;
+
+
+ if (internals->slave_update_idx >= REORDER_PERIOD_MS)
+ update_stats = 1;
+
+ for (i = 0; i < internals->active_slave_count; i++) {
+ slave_id = internals->active_slaves[i];
+ rte_eth_stats_get(slave_id, &slave_stats);
+ tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
+ bandwidth_left(slave_id, tx_bytes,
+ internals->slave_update_idx, &bwg_array[i]);
+ bwg_array[i].slave = slave_id;
+
+ if (update_stats)
+ tlb_last_obytets[slave_id] = slave_stats.obytes;
+ }
+
+ if (update_stats == 1)
+ internals->slave_update_idx = 0;
+
+ slave_count = i;
+ qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
+ for (i = 0; i < slave_count; i++)
+ internals->active_slaves[i] = bwg_array[i].slave;
+
+ rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
+ (struct bond_dev_private *)internals);
+}
+
+static uint16_t
+bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+ struct bond_dev_private *internals = bd_tx_q->dev_private;
+
+ struct rte_eth_dev *primary_port =
+ &rte_eth_devices[internals->primary_port];
+ uint16_t num_tx_total = 0;
+ uint8_t i, j;
+
+ uint8_t num_of_slaves = internals->active_slave_count;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+
+ struct ether_hdr *ether_hdr;
+ struct ether_addr primary_slave_addr;
+ struct ether_addr active_slave_addr;
+
+ if (num_of_slaves < 1)
+ return num_tx_total;
+
+ memcpy(slaves, internals->active_slaves,
+ sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+
+ ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
+
+ if (nb_pkts > 3) {
+ for (i = 0; i < 3; i++)
+ rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
+ }
+
+ for (i = 0; i < num_of_slaves; i++) {
+ ether_addr_copy(&internals->slaves[slaves[i]].persisted_mac_addr,
+ &active_slave_addr);
+
+ for (j = num_tx_total; j < nb_pkts; j++) {
+ if (j + 3 < nb_pkts)
+ rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
+
+ ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+ if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
+ ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
+ }
+
+ num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ bufs + num_tx_total, nb_pkts - num_tx_total);
+
+ if (num_tx_total == nb_pkts)
+ break;
+ }
+
+ return num_tx_total;
+}
+
static uint16_t
bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
uint16_t nb_pkts)
int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
tx_fail_total += slave_tx_fail_count;
- memcpy(bufs[nb_pkts - tx_fail_total],
- slave_bufs[i][num_tx_slave], slave_tx_fail_count);
+ memcpy(&bufs[nb_pkts - tx_fail_total],
+ &slave_bufs[i][num_tx_slave],
+ slave_tx_fail_count * sizeof(bufs[0]));
}
num_tx_total += num_tx_slave;
}
}
+ return num_tx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+ struct bond_tx_queue *bd_tx_q;
+
+ uint8_t num_of_slaves;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+ /* possitions in slaves, not ID */
+ uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
+ uint8_t distributing_count;
+
+ uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
+ uint16_t i, j, op_slave_idx;
+ const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
+
+ /* Allocate additional packets in case 8023AD mode. */
+ struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
+ void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
+
+ /* Total amount of packets in slave_bufs */
+ uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+ /* Slow packets placed in each slave */
+ uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+ bd_tx_q = (struct bond_tx_queue *)queue;
+ internals = bd_tx_q->dev_private;
+
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ num_of_slaves = internals->active_slave_count;
+ if (num_of_slaves < 1)
+ return num_tx_total;
+
+ memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+
+ distributing_count = 0;
+ for (i = 0; i < num_of_slaves; i++) {
+ struct port *port = &mode_8023ad_ports[slaves[i]];
+
+ slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
+ slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
+ slave_nb_pkts[i] = slave_slow_nb_pkts[i];
+
+ for (j = 0; j < slave_slow_nb_pkts[i]; j++)
+ slave_bufs[i][j] = slow_pkts[j];
+
+ if (ACTOR_STATE(port, DISTRIBUTING))
+ distributing_offsets[distributing_count++] = i;
+ }
+
+ if (likely(distributing_count > 0)) {
+ /* Populate slaves mbuf with the packets which are to be sent on it */
+ for (i = 0; i < nb_pkts; i++) {
+ /* Select output slave using hash based on xmit policy */
+ op_slave_idx = xmit_slave_hash(bufs[i], distributing_count,
+ internals->balance_xmit_policy);
+
+ /* Populate slave mbuf arrays with mbufs for that slave. Use only
+ * slaves that are currently distributing. */
+ uint8_t slave_offset = distributing_offsets[op_slave_idx];
+ slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
+ slave_nb_pkts[slave_offset]++;
+ }
+ }
+
+ /* Send packet burst on each slave device */
+ for (i = 0; i < num_of_slaves; i++) {
+ if (slave_nb_pkts[i] == 0)
+ continue;
+
+ num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ slave_bufs[i], slave_nb_pkts[i]);
+
+ /* If tx burst fails drop slow packets */
+ for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
+ rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
+
+ num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
+ num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+
+ /* If tx burst fails move packets to end of bufs */
+ if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+ uint16_t j = nb_pkts - num_tx_fail_total;
+ for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
+ bufs[j] = slave_bufs[i][num_tx_slave];
+ }
+ }
return num_tx_total;
}
return 0;
}
+int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
+{
+ struct ether_addr *mac_addr;
+
+ mac_addr = eth_dev->data->mac_addrs;
+
+ if (eth_dev == NULL) {
+ RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
+ return -1;
+ }
+
+ if (dst_mac_addr == NULL) {
+ RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
+ return -1;
+ }
+
+ ether_addr_copy(mac_addr, dst_mac_addr);
+ return 0;
+}
+
int
mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
{
mac_addr = eth_dev->data->mac_addrs;
if (eth_dev == NULL) {
- RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
+ RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
return -1;
}
}
}
break;
+ case BONDING_MODE_8023AD:
+ bond_mode_8023ad_mac_address_update(bonded_eth_dev);
+ break;
case BONDING_MODE_ACTIVE_BACKUP:
+ case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
default:
for (i = 0; i < internals->slave_count; i++) {
if (internals->slaves[i].port_id ==
eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
break;
#endif
+ case BONDING_MODE_8023AD:
+ if (bond_mode_8023ad_enable(eth_dev) != 0)
+ return -1;
+
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
+ RTE_BOND_LOG(WARNING,
+ "Using mode 4, it is necessary to do TX burst and RX burst "
+ "at least every 100ms.");
+ break;
+ case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
+ break;
default:
return -1;
}
}
slave_details->link_status_wait_to_complete = 0;
-
+ /* clean tlb_last_obytes when adding port for bonding device */
memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
sizeof(struct ether_addr));
}
if (internals->user_defined_primary_port)
bond_ethdev_primary_set(internals, internals->primary_port);
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_start(eth_dev);
+
+ if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING)
+ bond_ethdev_update_tlb_slave_cb(internals);
+
return 0;
}
bond_ethdev_stop(struct rte_eth_dev *eth_dev)
{
struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t i;
+
+ if (internals->mode == BONDING_MODE_8023AD) {
+ struct port *port;
+ void *pkt = NULL;
+
+ bond_mode_8023ad_stop(eth_dev);
+
+ /* Discard all messages to/from mode 4 state machines */
+ for (i = 0; i < internals->slave_count; i++) {
+ port = &mode_8023ad_ports[internals->slaves[i].port_id];
+
+ RTE_VERIFY(port->rx_ring != NULL);
+ while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
+ rte_pktmbuf_free(pkt);
+
+ RTE_VERIFY(port->tx_ring != NULL);
+ while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
+ rte_pktmbuf_free(pkt);
+ }
+ }
+
+ if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) {
+ rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
+ }
internals->active_slave_count = 0;
internals->link_status_polling_enabled = 0;
static void
bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
{
+ struct bond_dev_private *internals = dev->data->dev_private;
+
dev_info->driver_name = driver_name;
dev_info->max_mac_addrs = 1;
dev_info->min_rx_bufsize = 0;
dev_info->pci_dev = dev->pci_dev;
+
+ dev_info->rx_offload_capa = internals->rx_offload_capa;
+ dev_info->tx_offload_capa = internals->tx_offload_capa;
}
static int
0, dev->pci_dev->numa_node);
if (bd_tx_q == NULL)
- return -1;
+ return -1;
bd_tx_q->queue_id = tx_queue_id;
bd_tx_q->dev_private = dev->data->dev_private;
rte_free(queue);
}
-
static void
bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
{
for (i = 0; i < internals->slave_count; i++)
rte_eth_promiscuous_enable(internals->slaves[i].port_id);
break;
+ /* In mode4 promiscus mode is managed when slave is added/removed */
+ case BONDING_MODE_8023AD:
+ break;
/* Promiscuous mode is propagated only to primary slave */
case BONDING_MODE_ACTIVE_BACKUP:
+ case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
default:
rte_eth_promiscuous_enable(internals->current_primary_port);
-
}
}
for (i = 0; i < internals->slave_count; i++)
rte_eth_promiscuous_disable(internals->slaves[i].port_id);
break;
+ /* In mode4 promiscus mode is set managed when slave is added/removed */
+ case BONDING_MODE_8023AD:
+ break;
/* Promiscuous mode is propagated only to primary slave */
case BONDING_MODE_ACTIVE_BACKUP:
+ case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
default:
rte_eth_promiscuous_disable(internals->current_primary_port);
}
struct bond_dev_private *internals;
struct rte_eth_link link;
- int i, valid_slave = 0, active_pos = -1;
+ int i, valid_slave = 0;
+ uint8_t active_pos;
uint8_t lsc_flag = 0;
if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
return;
/* Search for port in active port list */
- for (i = 0; i < internals->active_slave_count; i++) {
- if (port_id == internals->active_slaves[i]) {
- active_pos = i;
- break;
- }
- }
+ active_pos = find_slave_by_id(internals->active_slaves,
+ internals->active_slave_count, port_id);
rte_eth_link_get_nowait(port_id, &link);
if (link.link_status) {
- if (active_pos >= 0)
+ if (active_pos < internals->active_slave_count)
return;
/* if no active slave ports then set this port to be primary port */
link_properties_set(bonded_eth_dev,
&(slave_eth_dev->data->dev_link));
}
- internals->active_slaves[internals->active_slave_count++] = port_id;
+
+ activate_slave(bonded_eth_dev, port_id);
/* If user has defined the primary port then default to using it */
if (internals->user_defined_primary_port &&
internals->primary_port == port_id)
bond_ethdev_primary_set(internals, port_id);
} else {
- if (active_pos < 0)
+ if (active_pos == internals->active_slave_count)
return;
/* Remove from active slave list */
- for (i = active_pos; i < (internals->active_slave_count - 1); i++)
- internals->active_slaves[i] = internals->active_slaves[i+1];
-
- internals->active_slave_count--;
+ deactivate_slave(bonded_eth_dev, port_id);
/* No active slaves, change link status to down and reset other
* link properties */