bond: add mode 4
authorPawel Wodkowski <pawelx.wodkowski@intel.com>
Thu, 27 Nov 2014 18:01:10 +0000 (18:01 +0000)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Thu, 27 Nov 2014 20:20:58 +0000 (21:20 +0100)
This patch set add support for dynamic link aggregation (mode 4) to the
librte_pmd_bond library. This mode provides auto negotiation/configuration
of peers and well as link status changes monitoring using out of band
LACP (link aggregation control protocol) messages. For further details of
LACP specification see the IEEE 802.3ad/802.1AX standards. It is also
described here
https://www.kernel.org/doc/Documentation/networking/bonding.txt.

In this implementation we have an array of mode 4 settings for each slave.
There is also assumption that for every port is one aggregator (it might
be unused if better is found).

Difference in this implementation vs Linux implementation:
- this implementation it is not directly based on state machines but current
  state is calculated from actor and partner states (and other things too).

Some implementation details:
- during rx burst every packet Is checked if this is LACP or marker packet.
  If it is LACP frame it is passed to mode 4 logic using slaves rx ring  and
  removed from rx buffer before it is returned
- in tx burst, packets from mode 4 (if any) are injected into each slave.
- there is a timer running in background to process/produce mode 4
  frames form rx/to tx functions.

Some requirements for this mode:
- for LACP mode to work rx and tx burst functions must be invoked
  at least in 100ms intervals
- provided buffer to rx burst should be at least 2x slave count size. This is
  not needed but might increase performance especially during initial
  handshake.

Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Acked-by: Declan Doherty <declan.doherty@intel.com>
lib/librte_ether/rte_ether.h
lib/librte_pmd_bond/Makefile
lib/librte_pmd_bond/rte_eth_bond.h
lib/librte_pmd_bond/rte_eth_bond_8023ad.c [new file with mode: 0644]
lib/librte_pmd_bond/rte_eth_bond_8023ad.h [new file with mode: 0644]
lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h [new file with mode: 0644]
lib/librte_pmd_bond/rte_eth_bond_api.c
lib/librte_pmd_bond/rte_eth_bond_args.c
lib/librte_pmd_bond/rte_eth_bond_pmd.c
lib/librte_pmd_bond/rte_eth_bond_private.h

index 187608d..7e7d22c 100644 (file)
@@ -328,6 +328,7 @@ struct vxlan_hdr {
 #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
 #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
 #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
+#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
 
 #define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr))
 /**< VXLAN tunnel header length. */
index d4e10bf..cdff126 100644 (file)
@@ -45,6 +45,7 @@ CFLAGS += $(WERROR_FLAGS)
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
 
 ifeq ($(CONFIG_RTE_MBUF_REFCNT),n)
 $(info WARNING: Link Bonding Broadcast mode is disabled because it needs MBUF_REFCNT.)
@@ -54,6 +55,7 @@ endif
 # Export include files
 #
 SYMLINK-y-include += rte_eth_bond.h
+SYMLINK-y-include += rte_eth_bond_8023ad.h
 
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_mbuf
index 085500b..9101f64 100644 (file)
@@ -77,6 +77,25 @@ extern "C" {
  * In this mode all transmitted packets will be transmitted on all available
  * active slaves of the bonded. */
 #endif
+#define BONDING_MODE_8023AD                            (4)
+/**< 802.3AD (Mode 4).
+ *
+ * This mode provides auto negotiation/configuration
+ * of peers and well as link status changes monitoring using out of band
+ * LACP (link aggregation control protocol) messages. For further details of
+ * LACP specification see the IEEE 802.3ad/802.1AX standards. It is also
+ * described here
+ * https://www.kernel.org/doc/Documentation/networking/bonding.txt.
+ *
+ * Important Usage Notes:
+ * - for LACP mode to work the rx/tx burst functions must be invoked
+ * at least once every 100ms, otherwise the out-of-band LACP messages will not
+ * be handled with the expected latency and this may cause the link status to be
+ * incorrectly marked as down or failure to correctly negotiate with peers.
+ * - For optimal performance during initial handshaking the array of mbufs provided
+ * to rx_burst should be at least 2 times the slave count size.
+ *
+ */
 /* Balance Mode Transmit Policies */
 #define BALANCE_XMIT_POLICY_LAYER2             (0)
 /**< Layer 2 (Ethernet MAC) */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
new file mode 100644 (file)
index 0000000..f1cf81a
--- /dev/null
@@ -0,0 +1,1216 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_cycles.h>
+
+#include "rte_eth_bond_private.h"
+
+#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
+#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
+                       bond_dbg_get_time_diff_ms(), slave_id, \
+                       __func__, ##__VA_ARGS__)
+
+static uint64_t start_time;
+
+static unsigned
+bond_dbg_get_time_diff_ms(void)
+{
+       uint64_t now;
+
+       now = rte_rdtsc();
+       if (start_time == 0)
+               start_time = now;
+
+       return ((now - start_time) * 1000) / rte_get_tsc_hz();
+}
+
+static void
+bond_print_lacp(struct lacpdu *l)
+{
+       char a_address[18];
+       char p_address[18];
+       char a_state[256] = { 0 };
+       char p_state[256] = { 0 };
+
+       static const char * const state_labels[] = {
+               "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
+       };
+
+       int a_len = 0;
+       int p_len = 0;
+       uint8_t i;
+       uint8_t *addr;
+
+       addr = l->actor.port_params.system.addr_bytes;
+       snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
+               addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+       addr = l->partner.port_params.system.addr_bytes;
+       snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
+               addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+       for (i = 0; i < 8; i++) {
+               if ((l->actor.state >> i) & 1) {
+                       a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
+                               state_labels[i]);
+               }
+
+               if ((l->partner.state >> i) & 1) {
+                       p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
+                               state_labels[i]);
+               }
+       }
+
+       if (a_len && a_state[a_len-1] == ' ')
+               a_state[a_len-1] = '\0';
+
+       if (p_len && p_state[p_len-1] == ' ')
+               p_state[p_len-1] = '\0';
+
+       RTE_LOG(DEBUG, PMD, "LACP: {\n"\
+                       "  subtype= %02X\n"\
+                       "  ver_num=%02X\n"\
+                       "  actor={ tlv=%02X, len=%02X\n"\
+                       "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
+                       "       state={ %s }\n"\
+                       "  }\n"\
+                       "  partner={ tlv=%02X, len=%02X\n"\
+                       "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
+                       "       state={ %s }\n"\
+                       "  }\n"\
+                       "  collector={info=%02X, length=%02X, max_delay=%04X\n, " \
+                                                       "type_term=%02X, terminator_length = %02X}\n",\
+                       l->subtype,\
+                       l->version_number,\
+                       l->actor.tlv_type_info,\
+                       l->actor.info_length,\
+                       l->actor.port_params.system_priority,\
+                       a_address,\
+                       l->actor.port_params.key,\
+                       l->actor.port_params.port_priority,\
+                       l->actor.port_params.port_number,\
+                       a_state,\
+                       l->partner.tlv_type_info,\
+                       l->partner.info_length,\
+                       l->partner.port_params.system_priority,\
+                       p_address,\
+                       l->partner.port_params.key,\
+                       l->partner.port_params.port_priority,\
+                       l->partner.port_params.port_number,\
+                       p_state,\
+                       l->tlv_type_collector_info,\
+                       l->collector_info_length,\
+                       l->collector_max_delay,\
+                       l->tlv_type_terminator,\
+                       l->terminator_length);
+
+}
+#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
+#else
+#define BOND_PRINT_LACP(lacpdu) do { } while (0)
+#define MODE4_DEBUG(fmt, ...) do { } while (0)
+#endif
+
+static const struct ether_addr lacp_mac_addr = {
+       .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
+};
+
+struct port mode_8023ad_ports[RTE_MAX_ETHPORTS];
+
+static void
+timer_cancel(uint64_t *timer)
+{
+       *timer = 0;
+}
+
+static void
+timer_set(uint64_t *timer, uint64_t timeout)
+{
+       *timer = rte_rdtsc() + timeout;
+}
+
+/* Forces given timer to be in expired state. */
+static void
+timer_force_expired(uint64_t *timer)
+{
+       *timer = rte_rdtsc();
+}
+
+static bool
+timer_is_stopped(uint64_t *timer)
+{
+       return *timer == 0;
+}
+
+static bool
+timer_is_expired(uint64_t *timer)
+{
+       return *timer < rte_rdtsc();
+}
+
+/* Timer is in running state if it is not stopped nor expired */
+static bool
+timer_is_running(uint64_t *timer)
+{
+       return !timer_is_stopped(timer) && !timer_is_expired(timer);
+}
+
+static void
+set_warning_flags(struct port *port, uint16_t flags)
+{
+       int retval;
+       uint16_t old;
+       uint16_t new_flag = 0;
+
+       do {
+               old = port->warnings_to_show;
+               new_flag = old | flags;
+               retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
+       } while (unlikely(retval == 0));
+}
+
+static void
+show_warnings(uint8_t slave_id)
+{
+       struct port *port = &mode_8023ad_ports[slave_id];
+       uint8_t warnings;
+
+       do {
+               warnings = port->warnings_to_show;
+       } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
+
+       if (!warnings)
+               return;
+
+       if (!timer_is_expired(&port->warning_timer))
+               return;
+
+
+       timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
+                       rte_get_tsc_hz() / 1000);
+
+       if (warnings & WRN_RX_QUEUE_FULL) {
+               RTE_LOG(DEBUG, PMD,
+                       "Slave %u: failed to enqueue LACP packet into RX ring.\n"
+                       "Receive and transmit functions must be invoked on bonded\n"
+                       "interface at least 10 times per second or LACP will not\n"
+                       "work correctly\n", slave_id);
+       }
+
+       if (warnings & WRN_TX_QUEUE_FULL) {
+               RTE_LOG(DEBUG, PMD,
+                       "Slave %u: failed to enqueue LACP packet into TX ring.\n"
+                       "Receive and transmit functions must be invoked on bonded\n"
+                       "interface at least 10 times per second or LACP will not\n"
+                       "work correctly\n", slave_id);
+       }
+
+       if (warnings & WRN_RX_MARKER_TO_FAST)
+               RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id);
+
+       if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
+               RTE_LOG(INFO, PMD,
+                       "Slave %u: ignoring unknown slow protocol frame type", slave_id);
+       }
+
+       if (warnings & WRN_UNKNOWN_MARKER_TYPE)
+               RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id);
+
+       if (warnings & WRN_NOT_LACP_CAPABLE)
+               MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
+}
+
+static void
+record_default(struct port *port)
+{
+       /* Record default parameters for partner. Partner admin parameters
+        * are not implemented so set them to arbitrary default (last known) and
+        * mark actor that parner is in defaulted state. */
+       port->partner_state = STATE_LACP_ACTIVE;
+       ACTOR_STATE_SET(port, DEFAULTED);
+}
+
+/** Function handles rx state machine.
+ *
+ * This function implements Receive State Machine from point 5.4.12 in
+ * 802.1AX documentation. It should be called periodically.
+ *
+ * @param lacpdu               LACPDU received.
+ * @param port                 Port on which LACPDU was received.
+ */
+static void
+rx_machine(struct bond_dev_private *internals, uint8_t slave_id,
+               struct lacpdu *lacp)
+{
+       struct port *agg, *port = &mode_8023ad_ports[slave_id];
+       uint64_t timeout;
+
+       if (SM_FLAG(port, BEGIN)) {
+               /* Initialize stuff */
+               MODE4_DEBUG("-> INITIALIZE\n");
+               SM_FLAG_CLR(port, MOVED);
+               port->selected = UNSELECTED;
+
+               record_default(port);
+
+               ACTOR_STATE_CLR(port, EXPIRED);
+               timer_cancel(&port->current_while_timer);
+
+               /* DISABLED: On initialization partner is out of sync */
+               PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+
+               /* LACP DISABLED stuff if LACP not enabled on this port */
+               if (!SM_FLAG(port, LACP_ENABLED))
+                       PARTNER_STATE_CLR(port, AGGREGATION);
+               else
+                       PARTNER_STATE_SET(port, AGGREGATION);
+       }
+
+       if (!SM_FLAG(port, LACP_ENABLED)) {
+               /* Update parameters only if state changed */
+               if (!timer_is_stopped(&port->current_while_timer)) {
+                       port->selected = UNSELECTED;
+                       record_default(port);
+                       PARTNER_STATE_CLR(port, AGGREGATION);
+                       ACTOR_STATE_CLR(port, EXPIRED);
+                       timer_cancel(&port->current_while_timer);
+               }
+               return;
+       }
+
+       if (lacp) {
+               MODE4_DEBUG("LACP -> CURRENT\n");
+               BOND_PRINT_LACP(lacp);
+               /* Update selected flag. If partner parameters are defaulted assume they
+                * are match. If not defaulted  compare LACP actor with ports parner
+                * params. */
+               if (!ACTOR_STATE(port, DEFAULTED) &&
+                       (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
+                       || memcmp(&port->partner, &lacp->actor.port_params,
+                               sizeof(port->partner)) != 0)) {
+                       MODE4_DEBUG("selected <- UNSELECTED\n");
+                       port->selected = UNSELECTED;
+               }
+
+               /* Record this PDU actor params as partner params */
+               memcpy(&port->partner, &lacp->actor.port_params,
+                       sizeof(struct port_params));
+               port->partner_state = lacp->actor.state;
+
+               /* Partner parameters are not defaulted any more */
+               ACTOR_STATE_CLR(port, DEFAULTED);
+
+               /* If LACP partner params match this port actor params */
+               agg = &mode_8023ad_ports[port->aggregator_port_id];
+               bool match = port->actor.system_priority ==
+                       lacp->partner.port_params.system_priority &&
+                       is_same_ether_addr(&agg->actor.system,
+                       &lacp->partner.port_params.system) &&
+                       port->actor.port_priority ==
+                       lacp->partner.port_params.port_priority &&
+                       port->actor.port_number ==
+                       lacp->partner.port_params.port_number;
+
+               /* Update NTT if partners information are outdated (xored and masked
+                * bits are set)*/
+               uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
+                       STATE_SYNCHRONIZATION | STATE_AGGREGATION;
+
+               if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
+                               match == false) {
+                       SM_FLAG_SET(port, NTT);
+               }
+
+               /* If LACP partner params match this port actor params */
+               if (match == true && ACTOR_STATE(port, AGGREGATION) ==
+                               PARTNER_STATE(port,     AGGREGATION))
+                       PARTNER_STATE_SET(port, SYNCHRONIZATION);
+               else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
+                               AGGREGATION))
+                       PARTNER_STATE_SET(port, SYNCHRONIZATION);
+               else
+                       PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+
+               if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
+                       timeout = internals->mode4.short_timeout;
+               else
+                       timeout = internals->mode4.long_timeout;
+
+               timer_set(&port->current_while_timer, timeout);
+               ACTOR_STATE_CLR(port, EXPIRED);
+               return; /* No state change */
+       }
+
+       /* If CURRENT state timer is not running (stopped or expired)
+        * transit to EXPIRED state from DISABLED or CURRENT */
+       if (!timer_is_running(&port->current_while_timer)) {
+               ACTOR_STATE_SET(port, EXPIRED);
+               PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+               PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
+               timer_set(&port->current_while_timer, internals->mode4.short_timeout);
+       }
+}
+
+/**
+ * Function handles periodic tx state machine.
+ *
+ * Function implements Periodic Transmission state machine from point 5.4.13
+ * in 802.1AX documentation. It should be called periodically.
+ *
+ * @param port                 Port to handle state machine.
+ */
+static void
+periodic_machine(struct bond_dev_private *internals, uint8_t slave_id)
+{
+       struct port *port = &mode_8023ad_ports[slave_id];
+       /* Calculate if either site is LACP enabled */
+       uint64_t timeout;
+       uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
+               PARTNER_STATE(port, LACP_ACTIVE);
+
+       uint8_t is_partner_fast, was_partner_fast;
+       /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
+       if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
+               timer_cancel(&port->periodic_timer);
+               timer_force_expired(&port->tx_machine_timer);
+               SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
+
+               MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
+                       SM_FLAG(port, BEGIN) ? "begind " : "",
+                       SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
+                       active ? "LACP active " : "LACP pasive ");
+               return;
+       }
+
+       is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
+       was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
+
+       /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
+        * Other case: check if timer expire or partners settings changed. */
+       if (!timer_is_stopped(&port->periodic_timer)) {
+               if (timer_is_expired(&port->periodic_timer)) {
+                       SM_FLAG_SET(port, NTT);
+               } else if (is_partner_fast != was_partner_fast) {
+                       /* Partners timeout  was slow and now it is fast -> send LACP.
+                        * In other case (was fast and now it is slow) just switch
+                        * timeout to slow without forcing send of LACP (because standard
+                        * say so)*/
+                       if (!is_partner_fast)
+                               SM_FLAG_SET(port, NTT);
+               } else
+                       return; /* Nothing changed */
+       }
+
+       /* Handle state transition to FAST/SLOW LACP timeout */
+       if (is_partner_fast) {
+               timeout = internals->mode4.fast_periodic_timeout;
+               SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
+       } else {
+               timeout = internals->mode4.slow_periodic_timeout;
+               SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
+       }
+
+       timer_set(&port->periodic_timer, timeout);
+}
+
+/**
+ * Function handles mux state machine.
+ *
+ * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
+ * It should be called periodically.
+ *
+ * @param port                 Port to handle state machine.
+ */
+static void
+mux_machine(struct bond_dev_private *internals, uint8_t slave_id)
+{
+       struct port *port = &mode_8023ad_ports[slave_id];
+
+       /* Save current state for later use */
+       const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
+               STATE_COLLECTING;
+
+       /* Enter DETACHED state on BEGIN condition or from any other state if
+        * port was unselected */
+       if (SM_FLAG(port, BEGIN) ||
+                       port->selected == UNSELECTED || (port->selected == STANDBY &&
+                               (port->actor_state & state_mask) != 0)) {
+               /* detach mux from aggregator */
+               port->actor_state &= ~state_mask;
+               /* Set ntt to true if BEGIN condition or transition from any other state
+                * which is indicated that wait_while_timer was started */
+               if (SM_FLAG(port, BEGIN) ||
+                               !timer_is_stopped(&port->wait_while_timer)) {
+                       SM_FLAG_SET(port, NTT);
+                       MODE4_DEBUG("-> DETACHED\n");
+               }
+               timer_cancel(&port->wait_while_timer);
+       }
+
+       if (timer_is_stopped(&port->wait_while_timer)) {
+               if (port->selected == SELECTED || port->selected == STANDBY) {
+                       timer_set(&port->wait_while_timer,
+                               internals->mode4.aggregate_wait_timeout);
+
+                       MODE4_DEBUG("DETACHED -> WAITING\n");
+               }
+               /* Waiting state entered */
+               return;
+       }
+
+       /* Transit next state if port is ready */
+       if (!timer_is_expired(&port->wait_while_timer))
+               return;
+
+       if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
+               !PARTNER_STATE(port, SYNCHRONIZATION)) {
+               /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
+                * sync transit to ATACHED state.  */
+               ACTOR_STATE_CLR(port, DISTRIBUTING);
+               ACTOR_STATE_CLR(port, COLLECTING);
+               /* Clear actor sync to activate transit ATACHED in condition bellow */
+               ACTOR_STATE_CLR(port, SYNCHRONIZATION);
+               MODE4_DEBUG("Out of sync -> ATTACHED\n");
+       }
+
+       if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
+               /* attach mux to aggregator */
+               RTE_VERIFY((port->actor_state & (STATE_COLLECTING |
+                       STATE_DISTRIBUTING)) == 0);
+
+               ACTOR_STATE_SET(port, SYNCHRONIZATION);
+               SM_FLAG_SET(port, NTT);
+               MODE4_DEBUG("ATTACHED Entered\n");
+       } else if (!ACTOR_STATE(port, COLLECTING)) {
+               /* Start collecting if in sync */
+               if (PARTNER_STATE(port, SYNCHRONIZATION)) {
+                       MODE4_DEBUG("ATTACHED -> COLLECTING\n");
+                       ACTOR_STATE_SET(port, COLLECTING);
+                       SM_FLAG_SET(port, NTT);
+               }
+       } else if (ACTOR_STATE(port, COLLECTING)) {
+               /* Check if partner is in COLLECTING state. If so this port can
+                * distribute frames to it */
+               if (!ACTOR_STATE(port, DISTRIBUTING)) {
+                       if (PARTNER_STATE(port, COLLECTING)) {
+                               /* Enable  DISTRIBUTING if partner is collecting */
+                               ACTOR_STATE_SET(port, DISTRIBUTING);
+                               SM_FLAG_SET(port, NTT);
+                               MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
+                               RTE_LOG(INFO, PMD,
+                                       "Bond %u: slave id %u distributing started.\n",
+                                       internals->port_id, slave_id);
+                       }
+               } else {
+                       if (!PARTNER_STATE(port, COLLECTING)) {
+                               /* Disable DISTRIBUTING (enter COLLECTING state) if partner
+                                * is not collecting */
+                               ACTOR_STATE_CLR(port, DISTRIBUTING);
+                               SM_FLAG_SET(port, NTT);
+                               MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
+                               RTE_LOG(INFO, PMD,
+                                       "Bond %u: slave id %u distributing stopped.\n",
+                                       internals->port_id, slave_id);
+                       }
+               }
+       }
+}
+
+/**
+ * Function handles transmit state machine.
+ *
+ * Function implements Transmit Machine from point 5.4.16 in 802.1AX
+ * documentation.
+ *
+ * @param port
+ */
+static void
+tx_machine(struct bond_dev_private *internals, uint8_t slave_id)
+{
+       struct port *agg, *port = &mode_8023ad_ports[slave_id];
+
+       struct rte_mbuf *lacp_pkt = NULL;
+       struct lacpdu_header *hdr;
+       struct lacpdu *lacpdu;
+
+       /* If periodic timer is not running periodic machine is in NO PERIODIC and
+        * according to 802.3ax standard tx machine should not transmit any frames
+        * and set ntt to false. */
+       if (timer_is_stopped(&port->periodic_timer))
+               SM_FLAG_CLR(port, NTT);
+
+       if (!SM_FLAG(port, NTT))
+               return;
+
+       if (!timer_is_expired(&port->tx_machine_timer))
+               return;
+
+       lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
+       if (lacp_pkt == NULL) {
+               RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
+               return;
+       }
+
+       lacp_pkt->data_len = sizeof(*hdr);
+       lacp_pkt->pkt_len = sizeof(*hdr);
+
+       hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
+
+       /* Source and destination MAC */
+       ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
+       rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
+       hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
+
+       lacpdu = &hdr->lacpdu;
+       memset(lacpdu, 0, sizeof(*lacpdu));
+
+       /* Initialize LACP part */
+       lacpdu->subtype = SLOW_SUBTYPE_LACP;
+       lacpdu->version_number = 1;
+
+       /* ACTOR */
+       lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
+       lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
+       memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
+                       sizeof(port->actor));
+       agg = &mode_8023ad_ports[port->aggregator_port_id];
+       ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system);
+       lacpdu->actor.state = port->actor_state;
+
+       /* PARTNER */
+       lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
+       lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
+       memcpy(&lacpdu->partner.port_params, &port->partner,
+                       sizeof(struct port_params));
+       lacpdu->partner.state = port->partner_state;
+
+       /* Other fields */
+       lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
+       lacpdu->collector_info_length = 0x10;
+       lacpdu->collector_max_delay = 0;
+
+       lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
+       lacpdu->terminator_length = 0;
+
+       if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
+               /* If TX ring full, drop packet and free message. Retransmission
+                * will happen in next function call. */
+               rte_pktmbuf_free(lacp_pkt);
+               set_warning_flags(port, WRN_TX_QUEUE_FULL);
+               return;
+       }
+
+       MODE4_DEBUG("sending LACP frame\n");
+       BOND_PRINT_LACP(lacpdu);
+
+       timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
+       SM_FLAG_CLR(port, NTT);
+}
+
+/**
+ * Function assigns port to aggregator.
+ *
+ * @param bond_dev_private     Pointer to bond_dev_private structure.
+ * @param port_pos                     Port to assign.
+ */
+static void
+selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
+{
+       struct port *agg, *port;
+       uint8_t slaves_count, new_agg_id, i;
+       uint8_t *slaves;
+
+       slaves = internals->active_slaves;
+       slaves_count = internals->active_slave_count;
+       port = &mode_8023ad_ports[slave_id];
+
+       /* Search for aggregator suitable for this port */
+       for (i = 0; i < slaves_count; ++i) {
+               agg = &mode_8023ad_ports[slaves[i]];
+               /* Skip ports that are not aggreagators */
+               if (agg->aggregator_port_id != slaves[i])
+                       continue;
+
+               /* Actors system ID is not checked since all slave device have the same
+                * ID (MAC address). */
+               if ((agg->actor.key == port->actor.key &&
+                       agg->partner.system_priority == port->partner.system_priority &&
+                       is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
+                       && (agg->partner.key == port->partner.key)) &&
+                       is_zero_ether_addr(&port->partner.system) != 1 &&
+                       (agg->actor.key &
+                               rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
+
+                       break;
+               }
+       }
+
+       /* By default, port uses it self as agregator */
+       if (i == slaves_count)
+               new_agg_id = slave_id;
+       else
+               new_agg_id = slaves[i];
+
+       if (new_agg_id != port->aggregator_port_id) {
+               port->aggregator_port_id = new_agg_id;
+
+               MODE4_DEBUG("-> SELECTED: ID=%3u\n"
+                       "\t%s aggregator ID=%3u\n",
+                       port->aggregator_port_id,
+                       port->aggregator_port_id == slave_id ?
+                               "aggregator not found, using default" : "aggregator found",
+                       port->aggregator_port_id);
+       }
+
+       port->selected = SELECTED;
+}
+
+/* Function maps DPDK speed to bonding speed stored in key field */
+static uint16_t
+link_speed_key(uint16_t speed) {
+       uint16_t key_speed;
+
+       switch (speed) {
+       case ETH_LINK_SPEED_AUTONEG:
+               key_speed = 0x00;
+               break;
+       case ETH_LINK_SPEED_10:
+               key_speed = BOND_LINK_SPEED_KEY_10M;
+               break;
+       case ETH_LINK_SPEED_100:
+               key_speed = BOND_LINK_SPEED_KEY_100M;
+               break;
+       case ETH_LINK_SPEED_1000:
+               key_speed = BOND_LINK_SPEED_KEY_1000M;
+               break;
+       case ETH_LINK_SPEED_10G:
+               key_speed = BOND_LINK_SPEED_KEY_10G;
+               break;
+       case ETH_LINK_SPEED_20G:
+               key_speed = BOND_LINK_SPEED_KEY_20G;
+               break;
+       case ETH_LINK_SPEED_40G:
+               key_speed = BOND_LINK_SPEED_KEY_40G;
+               break;
+       default:
+               /* Unknown speed*/
+               key_speed = 0xFFFF;
+       }
+
+       return key_speed;
+}
+
+static void
+bond_mode_8023ad_periodic_cb(void *arg)
+{
+       struct rte_eth_dev *bond_dev = arg;
+       struct bond_dev_private *internals = bond_dev->data->dev_private;
+       struct port *port;
+       struct rte_eth_link link_info;
+       struct ether_addr slave_addr;
+
+       void *pkt = NULL;
+       uint16_t i, slave_id;
+
+
+       /* Update link status on each port */
+       for (i = 0; i < internals->active_slave_count; i++) {
+               uint16_t key;
+
+               slave_id = internals->active_slaves[i];
+               rte_eth_link_get(slave_id, &link_info);
+               rte_eth_macaddr_get(slave_id, &slave_addr);
+
+               if (link_info.link_status != 0) {
+                       key = link_speed_key(link_info.link_speed) << 1;
+                       if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
+                               key |= BOND_LINK_FULL_DUPLEX_KEY;
+               } else
+                       key = 0;
+
+               port = &mode_8023ad_ports[slave_id];
+
+               key = rte_cpu_to_be_16(key);
+               if (key != port->actor.key) {
+                       if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
+                               set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
+
+                       port->actor.key = key;
+                       SM_FLAG_SET(port, NTT);
+               }
+
+               if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
+                       ether_addr_copy(&slave_addr, &port->actor.system);
+                       if (port->aggregator_port_id == slave_id)
+                               SM_FLAG_SET(port, NTT);
+               }
+       }
+
+       for (i = 0; i < internals->active_slave_count; i++) {
+               slave_id = internals->active_slaves[i];
+               port = &mode_8023ad_ports[slave_id];
+
+               if ((port->actor.key &
+                               rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
+
+                       SM_FLAG_SET(port, BEGIN);
+
+                       /* LACP is disabled on half duples or link is down */
+                       if (SM_FLAG(port, LACP_ENABLED)) {
+                               /* If port was enabled set it to BEGIN state */
+                               SM_FLAG_CLR(port, LACP_ENABLED);
+                               ACTOR_STATE_CLR(port, DISTRIBUTING);
+                               ACTOR_STATE_CLR(port, COLLECTING);
+                       }
+
+                       /* Skip this port processing */
+                       continue;
+               }
+
+               SM_FLAG_SET(port, LACP_ENABLED);
+
+               /* Find LACP packet to this port. Do not check subtype, it is done in
+                * function that queued packet */
+               if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
+                       struct rte_mbuf *lacp_pkt = pkt;
+                       struct lacpdu_header *lacp;
+
+                       lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
+                       RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
+
+                       /* This is LACP frame so pass it to rx_machine */
+                       rx_machine(internals, slave_id, &lacp->lacpdu);
+                       rte_pktmbuf_free(lacp_pkt);
+               } else
+                       rx_machine(internals, slave_id, NULL);
+
+               periodic_machine(internals, slave_id);
+               mux_machine(internals, slave_id);
+               tx_machine(internals, slave_id);
+               selection_logic(internals, slave_id);
+
+               SM_FLAG_CLR(port, BEGIN);
+               show_warnings(slave_id);
+       }
+
+       rte_eal_alarm_set(internals->mode4.update_timeout_us,
+                       bond_mode_8023ad_periodic_cb, arg);
+}
+
+void
+bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id)
+{
+       struct bond_dev_private *internals = bond_dev->data->dev_private;
+
+       struct port *port = &mode_8023ad_ports[slave_id];
+       struct port_params initial = {
+                       .system = { { 0 } },
+                       .system_priority = rte_cpu_to_be_16(0xFFFF),
+                       .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
+                       .port_priority = rte_cpu_to_be_16(0x00FF),
+                       .port_number = 0,
+       };
+
+       char mem_name[RTE_ETH_NAME_MAX_LEN];
+       uint8_t socket_id;
+       unsigned element_size;
+
+       /* Given slave mus not be in active list */
+       RTE_VERIFY(find_slave_by_id(internals->active_slaves,
+       internals->active_slave_count, slave_id) == internals->active_slave_count);
+
+       memcpy(&port->actor, &initial, sizeof(struct port_params));
+       /* Standard requires that port ID must be grater than 0.
+        * Add 1 do get corresponding port_number */
+       port->actor.port_number = rte_cpu_to_be_16((uint16_t)slave_id + 1);
+
+       memcpy(&port->partner, &initial, sizeof(struct port_params));
+
+       /* default states */
+       port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
+       port->partner_state = STATE_LACP_ACTIVE;
+       port->sm_flags = SM_FLAGS_BEGIN;
+
+       /* use this port as agregator */
+       port->aggregator_port_id = slave_id;
+       rte_eth_promiscuous_enable(slave_id);
+
+       timer_cancel(&port->warning_timer);
+
+       if (port->mbuf_pool != NULL)
+               return;
+
+       RTE_VERIFY(port->rx_ring == NULL);
+       RTE_VERIFY(port->tx_ring == NULL);
+       socket_id = rte_eth_devices[slave_id].pci_dev->numa_node;
+
+       element_size = sizeof(struct slow_protocol_frame) + sizeof(struct rte_mbuf)
+                               + RTE_PKTMBUF_HEADROOM;
+
+        /* How big memory pool should be? If driver will not
+         * free packets quick enough there will be ENOMEM in tx_machine.
+         * For now give 511 pkts * max number of queued TX packets per slave.
+         * Hope it will be enough. */
+       snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
+       port->mbuf_pool = rte_mempool_create(mem_name,
+               BOND_MODE_8023AX_SLAVE_TX_PKTS * 512 - 1,
+               element_size,
+               RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
+               sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
+               NULL, rte_pktmbuf_init, NULL, socket_id, MEMPOOL_F_NO_SPREAD);
+
+       /* Any memory allocation failure in initalization is critical because
+        * resources can't be free, so reinitialization is impossible. */
+       if (port->mbuf_pool == NULL) {
+               rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
+                       slave_id, mem_name, rte_strerror(rte_errno));
+       }
+
+       snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
+       port->rx_ring = rte_ring_create(mem_name,
+                       rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
+
+       if (port->rx_ring == NULL) {
+               rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
+                       mem_name, rte_strerror(rte_errno));
+       }
+
+       /* TX ring is at least one pkt longer to make room for marker packet. */
+       snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
+       port->tx_ring = rte_ring_create(mem_name,
+                       rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
+
+       if (port->tx_ring == NULL) {
+               rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
+                       mem_name, rte_strerror(rte_errno));
+       }
+}
+
+int
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
+               uint8_t slave_id)
+{
+       struct bond_dev_private *internals = bond_dev->data->dev_private;
+       void *pkt = NULL;
+       struct port *port;
+       uint8_t i;
+
+       /* Given slave mus be in active list */
+       RTE_VERIFY(find_slave_by_id(internals->active_slaves,
+       internals->active_slave_count, slave_id) < internals->active_slave_count);
+
+       /* Exclude slave from transmit policy. If this slave is an aggregator
+        * make all aggregated slaves unselected to force sellection logic
+        * to select suitable aggregator for this port. */
+       for (i = 0; i < internals->active_slave_count; i++) {
+               port = &mode_8023ad_ports[internals->active_slaves[i]];
+               if (port->aggregator_port_id != slave_id)
+                       continue;
+
+               port->selected = UNSELECTED;
+
+               /* Use default aggregator */
+               port->aggregator_port_id = internals->active_slaves[i];
+       }
+
+       port = &mode_8023ad_ports[slave_id];
+       port->selected = UNSELECTED;
+       port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
+                       STATE_COLLECTING);
+
+       while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
+               rte_pktmbuf_free((struct rte_mbuf *)pkt);
+
+       while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
+                       rte_pktmbuf_free((struct rte_mbuf *)pkt);
+       return 0;
+}
+
+void
+bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
+{
+       struct bond_dev_private *internals = bond_dev->data->dev_private;
+       struct ether_addr slave_addr;
+       struct port *slave, *agg_slave;
+       uint8_t slave_id, i, j;
+
+       bond_mode_8023ad_stop(bond_dev);
+
+       for (i = 0; i < internals->active_slave_count; i++) {
+               slave_id = internals->active_slaves[i];
+               slave = &mode_8023ad_ports[slave_id];
+               rte_eth_macaddr_get(slave_id, &slave_addr);
+
+               if (is_same_ether_addr(&slave_addr, &slave->actor.system))
+                       continue;
+
+               ether_addr_copy(&slave_addr, &slave->actor.system);
+               /* Do nothing if this port is not an aggregator. In other case
+                * Set NTT flag on every port that use this aggregator. */
+               if (slave->aggregator_port_id != slave_id)
+                       continue;
+
+               for (j = 0; j < internals->active_slave_count; j++) {
+                       agg_slave = &mode_8023ad_ports[internals->active_slaves[j]];
+                       if (agg_slave->aggregator_port_id == slave_id)
+                               SM_FLAG_SET(agg_slave, NTT);
+               }
+       }
+
+       if (bond_dev->data->dev_started)
+               bond_mode_8023ad_start(bond_dev);
+}
+
+void
+bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
+               struct rte_eth_bond_8023ad_conf *conf)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       struct mode8023ad_private *mode4 = &internals->mode4;
+       uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
+
+       conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
+       conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
+       conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
+       conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
+       conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
+       conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
+       conf->update_timeout_ms = mode4->update_timeout_us / 1000;
+}
+
+void
+bond_mode_8023ad_setup(struct rte_eth_dev *dev,
+               struct rte_eth_bond_8023ad_conf *conf)
+{
+       struct rte_eth_bond_8023ad_conf def_conf;
+       struct bond_dev_private *internals = dev->data->dev_private;
+       struct mode8023ad_private *mode4 = &internals->mode4;
+       uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
+
+       if (conf == NULL) {
+               conf = &def_conf;
+               conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
+               conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
+               conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
+               conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
+               conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
+               conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
+               conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
+               conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
+       }
+
+       mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
+       mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
+       mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
+       mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
+       mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
+       mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
+       mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
+       mode4->update_timeout_us = conf->update_timeout_ms * 1000;
+}
+
+int
+bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
+{
+       struct bond_dev_private *internals = bond_dev->data->dev_private;
+       uint16_t i;
+
+       for (i = 0; i < internals->active_slave_count; i++)
+               bond_mode_8023ad_activate_slave(bond_dev, i);
+
+       return 0;
+}
+
+int
+bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
+{
+       return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
+                       &bond_mode_8023ad_periodic_cb, bond_dev);
+}
+
+void
+bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
+{
+       rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
+}
+
+void
+bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
+       uint8_t slave_id, struct rte_mbuf *pkt)
+{
+       struct mode8023ad_private *mode4 = &internals->mode4;
+       struct port *port = &mode_8023ad_ports[slave_id];
+       struct marker_header *m_hdr;
+       uint64_t marker_timer, old_marker_timer;
+       int retval;
+       uint8_t wrn, subtype;
+       /* If packet is a marker, we send response now by reusing given packet
+        * and update only source MAC, destination MAC is multicast so don't
+        * update it. Other frames will be handled later by state machines */
+       subtype = rte_pktmbuf_mtod(pkt,
+                       struct slow_protocol_frame *)->slow_protocol.subtype;
+
+       if (subtype == SLOW_SUBTYPE_MARKER) {
+               m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
+
+               if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
+                       wrn = WRN_UNKNOWN_MARKER_TYPE;
+                       goto free_out;
+               }
+
+               /* Setup marker timer. Do it in loop in case concurent access. */
+               do {
+                       old_marker_timer = port->rx_marker_timer;
+                       if (!timer_is_expired(&old_marker_timer)) {
+                               wrn = WRN_RX_MARKER_TO_FAST;
+                               goto free_out;
+                       }
+
+                       timer_set(&marker_timer, mode4->rx_marker_timeout);
+                       retval = rte_atomic64_cmpset(&port->rx_marker_timer,
+                               old_marker_timer, marker_timer);
+               } while (unlikely(retval == 0));
+
+               m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
+               rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
+
+               if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) {
+                       /* reset timer */
+                       port->rx_marker_timer = 0;
+                       wrn = WRN_TX_QUEUE_FULL;
+                       goto free_out;
+               }
+       } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
+               if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
+                       /* If RX fing full free lacpdu message and drop packet */
+                       wrn = WRN_RX_QUEUE_FULL;
+                       goto free_out;
+               }
+       } else {
+               wrn = WRN_UNKNOWN_SLOW_TYPE;
+               goto free_out;
+       }
+
+       return;
+
+free_out:
+       set_warning_flags(port, wrn);
+       rte_pktmbuf_free(pkt);
+}
+
+int
+rte_eth_bond_8023ad_conf_get(uint8_t port_id,
+               struct rte_eth_bond_8023ad_conf *conf)
+{
+       struct rte_eth_dev *bond_dev;
+
+       if (valid_bonded_port_id(port_id) != 0)
+               return -EINVAL;
+
+       if (conf == NULL)
+               return -EINVAL;
+
+       bond_dev = &rte_eth_devices[port_id];
+       bond_mode_8023ad_conf_get(bond_dev, conf);
+       return 0;
+}
+
+int
+rte_eth_bond_8023ad_setup(uint8_t port_id,
+               struct rte_eth_bond_8023ad_conf *conf)
+{
+       struct rte_eth_dev *bond_dev;
+
+       if (valid_bonded_port_id(port_id) != 0)
+               return -EINVAL;
+
+       if (conf != NULL) {
+               /* Basic sanity check */
+               if (conf->slow_periodic_ms == 0 ||
+                               conf->fast_periodic_ms >= conf->slow_periodic_ms ||
+                               conf->long_timeout_ms == 0 ||
+                               conf->short_timeout_ms >= conf->long_timeout_ms ||
+                               conf->aggregate_wait_timeout_ms == 0 ||
+                               conf->tx_period_ms == 0 ||
+                               conf->rx_marker_period_ms == 0 ||
+                               conf->update_timeout_ms == 0) {
+                       RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n");
+                       return -EINVAL;
+               }
+       }
+
+       bond_dev = &rte_eth_devices[port_id];
+       bond_mode_8023ad_setup(bond_dev, conf);
+
+       return 0;
+}
+
+int
+rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
+               struct rte_eth_bond_8023ad_slave_info *info)
+{
+       struct rte_eth_dev *bond_dev;
+       struct bond_dev_private *internals;
+       struct port *port;
+
+       if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
+                       rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
+               return -EINVAL;
+
+       bond_dev = &rte_eth_devices[port_id];
+
+       internals = bond_dev->data->dev_private;
+       if (find_slave_by_id(internals->active_slaves,
+                       internals->active_slave_count, slave_id) ==
+                               internals->active_slave_count)
+               return -EINVAL;
+
+       port = &mode_8023ad_ports[slave_id];
+       info->selected = port->selected;
+
+       info->actor_state = port->actor_state;
+       rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
+
+       info->partner_state = port->partner_state;
+       rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
+
+       info->agg_port_id = port->aggregator_port_id;
+       return 0;
+}
diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
new file mode 100644 (file)
index 0000000..9adc6aa
--- /dev/null
@@ -0,0 +1,214 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_ETH_BOND_8023AD_H_
+#define RTE_ETH_BOND_8023AD_H_
+
+#include <rte_ether.h>
+
+/**
+ * Actor/partner states
+ */
+#define STATE_LACP_ACTIVE                   0x01
+#define STATE_LACP_SHORT_TIMEOUT            0x02
+#define STATE_AGGREGATION                   0x04
+#define STATE_SYNCHRONIZATION               0x08
+#define STATE_COLLECTING                    0x10
+#define STATE_DISTRIBUTING                  0x20
+/** Partners parameters are defaulted */
+#define STATE_DEFAULTED                     0x40
+#define STATE_EXPIRED                       0x80
+
+#define TLV_TYPE_ACTOR_INFORMATION          0x01
+#define TLV_TYPE_PARTNER_INFORMATION        0x02
+#define TLV_TYPE_COLLECTOR_INFORMATION      0x03
+#define TLV_TYPE_TERMINATOR_INFORMATION     0x00
+
+#define SLOW_SUBTYPE_LACP                   0x01
+#define SLOW_SUBTYPE_MARKER                 0x02
+
+#define MARKER_TLV_TYPE_INFO                0x01
+#define MARKER_TLV_TYPE_RESP                0x02
+
+enum rte_bond_8023ad_selection {
+       UNSELECTED,
+       STANDBY,
+       SELECTED
+};
+
+/** Generic slow protocol structure */
+struct slow_protocol {
+       uint8_t subtype;
+       uint8_t reserved_119[119];
+} __attribute__((__packed__));
+
+/** Generic slow protocol frame type structure */
+struct slow_protocol_frame {
+       struct ether_hdr eth_hdr;
+       struct slow_protocol slow_protocol;
+} __attribute__((__packed__));
+
+struct port_params {
+       uint16_t system_priority;
+       /**< System priority (unused in current implementation) */
+       struct ether_addr system;
+       /**< System ID - Slave MAC address, same as bonding MAC address */
+       uint16_t key;
+       /**< Speed information (implementation dependednt) and duplex. */
+       uint16_t port_priority;
+       /**< Priority of this (unused in current implementation) */
+       uint16_t port_number;
+       /**< Port number. It corresponds to slave port id. */
+} __attribute__((__packed__));
+
+struct lacpdu_actor_partner_params {
+       uint8_t tlv_type_info;
+       uint8_t info_length;
+       struct port_params port_params;
+       uint8_t state;
+       uint8_t reserved_3[3];
+} __attribute__((__packed__));
+
+/** LACPDU structure (5.4.2 in 802.1AX documentation). */
+struct lacpdu {
+       uint8_t subtype;
+       uint8_t version_number;
+
+       struct lacpdu_actor_partner_params actor;
+       struct lacpdu_actor_partner_params partner;
+
+       uint8_t tlv_type_collector_info;
+       uint8_t collector_info_length;
+       uint16_t collector_max_delay;
+       uint8_t reserved_12[12];
+
+       uint8_t tlv_type_terminator;
+       uint8_t terminator_length;
+       uint8_t reserved_50[50];
+} __attribute__((__packed__));
+
+/** LACPDU frame: Contains ethernet header and LACPDU. */
+struct lacpdu_header {
+       struct ether_hdr eth_hdr;
+       struct lacpdu lacpdu;
+} __attribute__((__packed__));
+
+struct marker {
+       uint8_t subtype;
+       uint8_t version_number;
+
+       uint8_t tlv_type_marker;
+       uint8_t info_length;
+       uint16_t requester_port;
+       struct ether_addr requester_system;
+       uint32_t requester_transaction_id;
+       uint8_t reserved_2[2];
+
+       uint8_t tlv_type_terminator;
+       uint8_t terminator_length;
+       uint8_t reserved_90[90];
+} __attribute__((__packed__));
+
+struct marker_header {
+       struct ether_hdr eth_hdr;
+       struct marker marker;
+} __attribute__((__packed__));
+
+struct rte_eth_bond_8023ad_conf {
+       uint32_t fast_periodic_ms;
+       uint32_t slow_periodic_ms;
+       uint32_t short_timeout_ms;
+       uint32_t long_timeout_ms;
+       uint32_t aggregate_wait_timeout_ms;
+       uint32_t tx_period_ms;
+       uint32_t rx_marker_period_ms;
+       uint32_t update_timeout_ms;
+};
+
+struct rte_eth_bond_8023ad_slave_info {
+       enum rte_bond_8023ad_selection selected;
+       uint8_t actor_state;
+       struct port_params actor;
+       uint8_t partner_state;
+       struct port_params partner;
+       uint8_t agg_port_id;
+};
+
+/**
+ * @internal
+ *
+ * Function returns current configuration of 802.3AX mode.
+ *
+ * @param port_id   Bonding device id
+ * @param conf         Pointer to timeout structure.
+ *
+ * @return
+ *   0 - if ok
+ *   -EINVAL if conf is NULL
+ */
+int
+rte_eth_bond_8023ad_conf_get(uint8_t port_id,
+               struct rte_eth_bond_8023ad_conf *conf);
+
+/**
+ * @internal
+ *
+ * Function set new configuration of 802.3AX mode.
+ *
+ * @param port_id   Bonding device id
+ * @param conf         Configuration, if NULL set default configuration.
+ * @return
+ *   0 - if ok
+ *   -EINVAL if configuration is invalid.
+ */
+int
+rte_eth_bond_8023ad_setup(uint8_t port_id,
+               struct rte_eth_bond_8023ad_conf *conf);
+
+/**
+ * @internal
+ *
+ * Function returns current state of given slave device.
+ *
+ * @param slave_id  Port id of valid slave.
+ * @param conf         buffer for configuration
+ * @return
+ *   0 - if ok
+ *   -EINVAL if conf is NULL or slave id is invalid (not a slave of given
+ *       bonded device or is not inactive).
+ */
+int
+rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
+               struct rte_eth_bond_8023ad_slave_info *conf);
+
+#endif /* RTE_ETH_BOND_8023AD_H_ */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h
new file mode 100644 (file)
index 0000000..8adee70
--- /dev/null
@@ -0,0 +1,308 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_ETH_BOND_8023AD_PRIVATE_H_
+#define RTE_ETH_BOND_8023AD_PRIVATE_H_
+
+#include <stdint.h>
+
+#include <rte_ether.h>
+#include <rte_byteorder.h>
+#include <rte_atomic.h>
+
+#include "rte_eth_bond_8023ad.h"
+
+#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS  100
+/** Maximum number of packets to one slave queued in TX ring. */
+#define BOND_MODE_8023AX_SLAVE_RX_PKTS        3
+/** Maximum number of LACP packets from one slave queued in TX ring. */
+#define BOND_MODE_8023AX_SLAVE_TX_PKTS        1
+/**
+ * Timeouts deffinitions (5.4.4 in 802.1AX documentation).
+ */
+#define BOND_8023AD_FAST_PERIODIC_MS                900
+#define BOND_8023AD_SLOW_PERIODIC_MS              29000
+#define BOND_8023AD_SHORT_TIMEOUT_MS               3000
+#define BOND_8023AD_LONG_TIMEOUT_MS               90000
+#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS    60000
+#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS      2000
+#define BOND_8023AD_TX_MACHINE_PERIOD_MS            500
+#define BOND_8023AD_RX_MARKER_PERIOD_MS            2000
+
+/**
+ * Interval of showing warning message from state machines. All messages will
+ * be held (and gathered together) to prevent flooding.
+ * This is no parto of 802.1AX standard.
+ */
+#define BOND_8023AD_WARNINGS_PERIOD_MS             1000
+
+
+
+/**
+ * State machine flags
+ */
+#define SM_FLAGS_BEGIN                      0x0001
+#define SM_FLAGS_LACP_ENABLED               0x0002
+#define SM_FLAGS_ACTOR_CHURN                0x0004
+#define SM_FLAGS_PARTNER_CHURN              0x0008
+#define SM_FLAGS_MOVED                      0x0100
+#define SM_FLAGS_PARTNER_SHORT_TIMEOUT      0x0200
+#define SM_FLAGS_NTT                        0x0400
+
+#define BOND_LINK_FULL_DUPLEX_KEY           0x01
+#define BOND_LINK_SPEED_KEY_10M             0x02
+#define BOND_LINK_SPEED_KEY_100M            0x04
+#define BOND_LINK_SPEED_KEY_1000M           0x08
+#define BOND_LINK_SPEED_KEY_10G             0x10
+#define BOND_LINK_SPEED_KEY_20G             0x11
+#define BOND_LINK_SPEED_KEY_40G             0x12
+
+#define WRN_RX_MARKER_TO_FAST      0x01
+#define WRN_UNKNOWN_SLOW_TYPE      0x02
+#define WRN_UNKNOWN_MARKER_TYPE    0x04
+#define WRN_NOT_LACP_CAPABLE       0x08
+#define WRN_RX_QUEUE_FULL       0x10
+#define WRN_TX_QUEUE_FULL       0x20
+
+#define CHECK_FLAGS(_variable, _f) ((_variable) & (_f))
+#define SET_FLAGS(_variable, _f) ((_variable) |= (_f))
+#define CLEAR_FLAGS(_variable, _f) ((_variable) &= ~(_f))
+
+#define SM_FLAG(_p, _f) (!!CHECK_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f))
+#define SM_FLAG_SET(_p, _f) SET_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)
+#define SM_FLAG_CLR(_p, _f) CLEAR_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)
+
+#define ACTOR_STATE(_p, _f) (!!CHECK_FLAGS((_p)->actor_state, STATE_ ## _f))
+#define ACTOR_STATE_SET(_p, _f) SET_FLAGS((_p)->actor_state, STATE_ ## _f)
+#define ACTOR_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->actor_state, STATE_ ## _f)
+
+#define PARTNER_STATE(_p, _f) (!!CHECK_FLAGS((_p)->partner_state, STATE_ ## _f))
+#define PARTNER_STATE_SET(_p, _f) SET_FLAGS((_p)->partner_state, STATE_ ## _f)
+#define PARTNER_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->partner_state, STATE_ ## _f)
+
+/** Variables associated with each port (5.4.7 in 802.1AX documentation). */
+struct port {
+       /**
+        * The operational values of the Actor's state parameters. Bitmask
+        * of port states.
+        */
+       uint8_t actor_state;
+
+       /** The operational Actor's port parameters */
+       struct port_params actor;
+
+       /**
+        * The operational value of the Actor's view of the current values of
+        * the Partner's state parameters. The Actor sets this variable either
+        * to the value received from the Partner in an LACPDU, or to the value
+        * of Partner_Admin_Port_State. Bitmask of port states.
+        */
+       uint8_t partner_state;
+
+       /** The operational Partner's port parameters */
+       struct port_params partner;
+
+       /* Additional port parameters not listed in documentation */
+       /** State machine flags */
+       uint16_t sm_flags;
+       enum rte_bond_8023ad_selection selected;
+
+       uint64_t current_while_timer;
+       uint64_t periodic_timer;
+       uint64_t wait_while_timer;
+       uint64_t tx_machine_timer;
+       uint64_t tx_marker_timer;
+       /* Agregator parameters */
+       /** Used aggregator port ID */
+       uint16_t aggregator_port_id;
+
+       /** Memory pool used to allocate rings */
+       struct rte_mempool *mbuf_pool;
+
+       /** Ring of LACP packets from RX burst function */
+       struct rte_ring *rx_ring;
+
+       /** Ring of slow protocol packets (LACP and MARKERS) to TX burst function */
+       struct rte_ring *tx_ring;
+
+       /** Timer which is also used as mutex. If is 0 (not running) RX marker
+        * packet might be responded. Otherwise shall be dropped. It is zeroed in
+        * mode 4 callback function after expire. */
+       volatile uint64_t rx_marker_timer;
+
+       uint64_t warning_timer;
+       volatile uint16_t warnings_to_show;
+};
+
+struct mode8023ad_private {
+       uint64_t fast_periodic_timeout;
+       uint64_t slow_periodic_timeout;
+       uint64_t short_timeout;
+       uint64_t long_timeout;
+       uint64_t aggregate_wait_timeout;
+       uint64_t tx_period_timeout;
+       uint64_t rx_marker_timeout;
+       uint64_t update_timeout_us;
+};
+
+/**
+ * @internal
+ * The pool of *port* structures. The size of the pool
+ * is configured at compile-time in the <rte_eth_bond_8023ad.c> file.
+ */
+extern struct port mode_8023ad_ports[];
+
+/* Forward declaration */
+struct bond_dev_private;
+
+/**
+ * @internal
+ *
+ * Get configuration of bonded interface.
+ *
+ *
+ * @param dev Bonded interface
+ * @param conf returned configuration
+ */
+void
+bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
+               struct rte_eth_bond_8023ad_conf *conf);
+
+/**
+ * @internal
+ *
+ * Set mode 4 configuration of bonded interface.
+ *
+ * @pre Bonded interface must be stopped.
+ *
+ * @param dev Bonded interface
+ * @param conf new configuration. If NULL set default configuration.
+ */
+void
+bond_mode_8023ad_setup(struct rte_eth_dev *dev,
+               struct rte_eth_bond_8023ad_conf *conf);
+
+/**
+ * @internal
+ *
+ * Enables 802.1AX mode and all active slaves on bonded interface.
+ *
+ * @param dev Bonded interface
+ * @return
+ *  0 on success, negative value otherwise.
+ */
+int
+bond_mode_8023ad_enable(struct rte_eth_dev *dev);
+
+/**
+ * @internal
+ *
+ * Disables 802.1AX mode of the bonded interface and slaves.
+ *
+ * @param dev Bonded interface
+ * @return
+ *   0 on success, negative value otherwise.
+ */
+int bond_mode_8023ad_disable(struct rte_eth_dev *dev);
+
+/**
+ * @internal
+ *
+ * Starts 802.3AX state machines management logic.
+ * @param dev Bonded interface
+ * @return
+ *   0 if machines was started, 1 if machines was already running,
+ *   negative value otherwise.
+ */
+int
+bond_mode_8023ad_start(struct rte_eth_dev *dev);
+
+/**
+ * @internal
+ *
+ * Stops 802.3AX state machines management logic.
+ * @param dev Bonded interface
+ * @return
+ *   0 if this call stopped state machines, -ENOENT if alarm was not set.
+ */
+void
+bond_mode_8023ad_stop(struct rte_eth_dev *dev);
+
+/**
+ * @internal
+ *
+ * Passes given slow packet to state machines management logic.
+ * @param internals Bonded device private data.
+ * @param slave_id Slave port id.
+ * @param slot_pkt Slow packet.
+ */
+void
+bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
+       uint8_t slave_id, struct rte_mbuf *pkt);
+
+/**
+ * @internal
+ *
+ * Appends given slave used slave
+ *
+ * @param dev       Bonded interface.
+ * @param port_id   Slave port ID to be added
+ *
+ * @return
+ *  0 on success, negative value otherwise.
+ */
+void
+bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint8_t port_id);
+
+/**
+ * @internal
+ *
+ * Denitializes and removes given slave from 802.1AX mode.
+ *
+ * @param dev       Bonded interface.
+ * @param slave_num Position of slave in active_slaves array
+ *
+ * @return
+ *  0 on success, negative value otherwise.
+ */
+int
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos);
+
+/**
+ * Updates state when MAC was changed on bonded device or one of its slaves.
+ * @param bond_dev Bonded device
+ */
+void
+bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev);
+
+#endif /* RTE_ETH_BOND_8023AD_H_ */
index f146bda..c8fb42c 100644 (file)
@@ -31,6 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <string.h>
+
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
@@ -38,6 +40,7 @@
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad_private.h"
 
 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
 
@@ -104,6 +107,49 @@ valid_slave_port_id(uint8_t port_id)
        return 0;
 }
 
+void
+activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+{
+       struct bond_dev_private *internals = eth_dev->data->dev_private;
+
+       if (internals->mode == BONDING_MODE_8023AD)
+               bond_mode_8023ad_activate_slave(eth_dev, port_id);
+
+       internals->active_slaves[internals->active_slave_count] = port_id;
+       internals->active_slave_count++;
+}
+
+void
+deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+{
+       uint8_t slave_pos;
+       struct bond_dev_private *internals = eth_dev->data->dev_private;
+       uint8_t active_count = internals->active_slave_count;
+
+       if (internals->mode == BONDING_MODE_8023AD) {
+               bond_mode_8023ad_stop(eth_dev);
+               bond_mode_8023ad_deactivate_slave(eth_dev, port_id);
+       }
+
+       slave_pos = find_slave_by_id(internals->active_slaves, active_count,
+                       port_id);
+
+       /* If slave was not at the end of the list
+        * shift active slaves up active array list */
+       if (slave_pos < active_count) {
+               active_count--;
+               memmove(internals->active_slaves + slave_pos,
+                               internals->active_slaves + slave_pos + 1,
+                               (active_count - slave_pos) *
+                                       sizeof(internals->active_slaves[0]));
+       }
+
+       internals->active_slave_count = active_count;
+
+       if (eth_dev->data->dev_started && internals->mode == BONDING_MODE_8023AD)
+               bond_mode_8023ad_start(eth_dev);
+}
+
 uint8_t
 number_of_sockets(void)
 {
@@ -216,15 +262,10 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
        eth_dev->dev_ops = &default_dev_ops;
        eth_dev->pci_dev = pci_dev;
 
-       if (bond_ethdev_mode_set(eth_dev, mode)) {
-               RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
-                                eth_dev->data->port_id, mode);
-               goto err;
-       }
-
        rte_spinlock_init(&internals->lock);
 
        internals->port_id = eth_dev->data->port_id;
+       internals->mode = BONDING_MODE_INVALID;
        internals->current_primary_port = 0;
        internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
        internals->user_defined_mac = 0;
@@ -242,6 +283,14 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
        memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
        memset(internals->slaves, 0, sizeof(internals->slaves));
 
+       /* Set mode 4 default configuration */
+       bond_mode_8023ad_setup(eth_dev, NULL);
+       if (bond_ethdev_mode_set(eth_dev, mode)) {
+               RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
+                                eth_dev->data->port_id, mode);
+               goto err;
+       }
+
        return eth_dev->data->port_id;
 
 err:
@@ -349,14 +398,12 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
                rte_eth_link_get_nowait(slave_port_id, &link_props);
 
                 if (link_props.link_status == 1)
-                       internals->active_slaves[internals->active_slave_count++] =
-                                       slave_port_id;
+                       activate_slave(bonded_eth_dev, slave_port_id);
        }
        return 0;
 
 }
 
-
 int
 rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
 {
@@ -381,31 +428,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
        return retval;
 }
 
-
 static int
 __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 {
+       struct rte_eth_dev *bonded_eth_dev;
        struct bond_dev_private *internals;
 
-       int i, slave_idx = -1;
+       int i, slave_idx;
 
        if (valid_slave_port_id(slave_port_id) != 0)
                return -1;
 
-       internals = rte_eth_devices[bonded_port_id].data->dev_private;
+       bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+       internals = bonded_eth_dev->data->dev_private;
 
        /* first remove from active slave list */
-       for (i = 0; i < internals->active_slave_count; i++) {
-               if (internals->active_slaves[i] == slave_port_id)
-                       slave_idx = i;
-
-               /* shift active slaves up active array list */
-               if (slave_idx >= 0 && i < (internals->active_slave_count - 1))
-                       internals->active_slaves[i] = internals->active_slaves[i+1];
-       }
+       slave_idx = find_slave_by_id(internals->active_slaves,
+               internals->active_slave_count, slave_port_id);
 
-       if (slave_idx >= 0)
-               internals->active_slave_count--;
+       if (slave_idx < internals->active_slave_count)
+               deactivate_slave(bonded_eth_dev, slave_port_id);
 
        slave_idx = -1;
        /* now find in slave list */
@@ -539,11 +581,12 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id)
 
        return internals->current_primary_port;
 }
+
 int
 rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
 {
        struct bond_dev_private *internals;
-       int i;
+       uint8_t i;
 
        if (valid_bonded_port_id(bonded_port_id) != 0)
                return -1;
@@ -675,7 +718,6 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
        return internals->balance_xmit_policy;
 }
 
-
 int
 rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
 {
@@ -731,7 +773,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
        return internals->link_down_delay_ms;
 }
 
-
 int
 rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
 
index d8ce681..bf7c1bc 100644 (file)
@@ -173,6 +173,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
 #ifdef RTE_MBUF_REFCNT
        case BONDING_MODE_BROADCAST:
 #endif
+       case BONDING_MODE_8023AD:
                return 0;
        default:
                RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
index cf2fbab..aa52813 100644 (file)
@@ -44,6 +44,7 @@
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad_private.h"
 
 static uint16_t
 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
@@ -90,6 +91,77 @@ bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
                        bd_rx_q->queue_id, bufs, nb_pkts);
 }
 
+static uint16_t
+bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+               uint16_t nb_pkts)
+{
+       /* Cast to structure, containing bonded device's port id and queue id */
+       struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+       struct bond_dev_private *internals = bd_rx_q->dev_private;
+       struct ether_addr bond_mac;
+
+       struct ether_hdr *hdr;
+
+       const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
+       uint16_t num_rx_total = 0;      /* Total number of received packets */
+       uint8_t slaves[RTE_MAX_ETHPORTS];
+       uint8_t slave_count;
+
+       uint8_t collecting;  /* current slave collecting status */
+       const uint8_t promisc = internals->promiscuous_en;
+       uint8_t i, j, k;
+
+       rte_eth_macaddr_get(internals->port_id, &bond_mac);
+       /* Copy slave list to protect against slave up/down changes during tx
+        * bursting */
+       slave_count = internals->active_slave_count;
+       memcpy(slaves, internals->active_slaves,
+                       sizeof(internals->active_slaves[0]) * slave_count);
+
+       for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
+               j = num_rx_total;
+               collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
+
+               /* Read packets from this slave */
+               num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
+                               &bufs[num_rx_total], nb_pkts - num_rx_total);
+
+               for (k = j; k < 2 && k < num_rx_total; k++)
+                       rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
+
+               /* Handle slow protocol packets. */
+               while (j < num_rx_total) {
+                       if (j + 3 < num_rx_total)
+                               rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
+
+                       hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+                       /* Remove packet from array if it is slow packet or slave is not
+                        * in collecting state or bondign interface is not in promiscus
+                        * mode and packet address does not match. */
+                       if (unlikely(hdr->ether_type == ether_type_slow_be ||
+                               !collecting || (!promisc &&
+                                       !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
+
+                               if (hdr->ether_type == ether_type_slow_be) {
+                                       bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
+                                               bufs[j]);
+                               } else
+                                       rte_pktmbuf_free(bufs[j]);
+
+                               /* Packet is managed by mode 4 or dropped, shift the array */
+                               num_rx_total--;
+                               if (j < num_rx_total) {
+                                       memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
+                                               (num_rx_total - j));
+                               }
+                       } else
+                               j++;
+               }
+       }
+
+       return num_rx_total;
+}
+
 static uint16_t
 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
                uint16_t nb_pkts)
@@ -143,7 +215,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
                                tx_fail_total += tx_fail_slave;
 
                                memcpy(&bufs[nb_pkts - tx_fail_total],
-                                               &slave_bufs[i][num_tx_slave], tx_fail_slave * sizeof(bufs[0]));
+                                               &slave_bufs[i][num_tx_slave],
+                                               tx_fail_slave * sizeof(bufs[0]));
                        }
                        num_tx_total += num_tx_slave;
                }
@@ -338,14 +411,107 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
                                int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
 
                                tx_fail_total += slave_tx_fail_count;
-                               memcpy(bufs[nb_pkts - tx_fail_total],
-                                               slave_bufs[i][num_tx_slave], slave_tx_fail_count);
+                               memcpy(&bufs[nb_pkts - tx_fail_total],
+                                               &slave_bufs[i][num_tx_slave],
+                                               slave_tx_fail_count * sizeof(bufs[0]));
                        }
 
                        num_tx_total += num_tx_slave;
                }
        }
 
+       return num_tx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+               uint16_t nb_pkts)
+{
+       struct bond_dev_private *internals;
+       struct bond_tx_queue *bd_tx_q;
+
+       uint8_t num_of_slaves;
+       uint8_t slaves[RTE_MAX_ETHPORTS];
+        /* possitions in slaves, not ID */
+       uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
+       uint8_t distributing_count;
+
+       uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
+       uint16_t i, j, op_slave_idx;
+       const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
+
+       /* Allocate additional packets in case 8023AD mode. */
+       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
+       void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS];
+
+       /* Total amount of packets in slave_bufs */
+       uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+       /* Slow packets placed in each slave */
+       uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+       bd_tx_q = (struct bond_tx_queue *)queue;
+       internals = bd_tx_q->dev_private;
+
+       /* Copy slave list to protect against slave up/down changes during tx
+        * bursting */
+       num_of_slaves = internals->active_slave_count;
+       if (num_of_slaves < 1)
+               return num_tx_total;
+
+       memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+
+       distributing_count = 0;
+       for (i = 0; i < num_of_slaves; i++) {
+               struct port *port = &mode_8023ad_ports[slaves[i]];
+
+               slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
+                               slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
+               slave_nb_pkts[i] = slave_slow_nb_pkts[i];
+
+               for (j = 0; j < slave_slow_nb_pkts[i]; j++)
+                       slave_bufs[i][j] = slow_pkts[j];
+
+               if (ACTOR_STATE(port, DISTRIBUTING))
+                       distributing_offsets[distributing_count++] = i;
+       }
+
+       if (likely(distributing_count > 0)) {
+               /* Populate slaves mbuf with the packets which are to be sent on it */
+               for (i = 0; i < nb_pkts; i++) {
+                       /* Select output slave using hash based on xmit policy */
+                       op_slave_idx = xmit_slave_hash(bufs[i], distributing_count,
+                                       internals->balance_xmit_policy);
+
+                       /* Populate slave mbuf arrays with mbufs for that slave. Use only
+                        * slaves that are currently distributing. */
+                       uint8_t slave_offset = distributing_offsets[op_slave_idx];
+                       slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
+                       slave_nb_pkts[slave_offset]++;
+               }
+       }
+
+       /* Send packet burst on each slave device */
+       for (i = 0; i < num_of_slaves; i++) {
+               if (slave_nb_pkts[i] == 0)
+                       continue;
+
+               num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+                               slave_bufs[i], slave_nb_pkts[i]);
+
+               /* If tx burst fails drop slow packets */
+               for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
+                       rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
+
+               num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
+               num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+
+               /* If tx burst fails move packets to end of bufs */
+               if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+                       uint16_t j = nb_pkts - num_tx_fail_total;
+                       for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
+                               bufs[j] = slave_bufs[i][num_tx_slave];
+               }
+       }
 
        return num_tx_total;
 }
@@ -450,6 +616,27 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link,
        return 0;
 }
 
+int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
+{
+       struct ether_addr *mac_addr;
+
+       mac_addr = eth_dev->data->mac_addrs;
+
+       if (eth_dev == NULL) {
+               RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
+               return -1;
+       }
+
+       if (dst_mac_addr == NULL) {
+               RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
+               return -1;
+       }
+
+       ether_addr_copy(mac_addr, dst_mac_addr);
+       return 0;
+}
+
 int
 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
 {
@@ -458,7 +645,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
        mac_addr = eth_dev->data->mac_addrs;
 
        if (eth_dev == NULL) {
-               RTE_BOND_LOG(ERR,  "NULL pointer eth_dev specified");
+               RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
                return -1;
        }
 
@@ -499,6 +686,9 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
                        }
                }
                break;
+       case BONDING_MODE_8023AD:
+               bond_mode_8023ad_mac_address_update(bonded_eth_dev);
+               break;
        case BONDING_MODE_ACTIVE_BACKUP:
        default:
                for (i = 0; i < internals->slave_count; i++) {
@@ -551,6 +741,16 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
                break;
 #endif
+       case BONDING_MODE_8023AD:
+               if (bond_mode_8023ad_enable(eth_dev) != 0)
+                       return -1;
+
+               eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
+               eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
+               RTE_BOND_LOG(WARNING,
+                               "Using mode 4, it is necessary to do TX burst and RX burst "
+                               "at least every 100ms.");
+               break;
        default:
                return -1;
        }
@@ -762,6 +962,9 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
        if (internals->user_defined_primary_port)
                bond_ethdev_primary_set(internals, internals->primary_port);
 
+       if (internals->mode == BONDING_MODE_8023AD)
+               bond_mode_8023ad_start(eth_dev);
+
        return 0;
 }
 
@@ -769,6 +972,27 @@ static void
 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 {
        struct bond_dev_private *internals = eth_dev->data->dev_private;
+       uint8_t i;
+
+       if (internals->mode == BONDING_MODE_8023AD) {
+               struct port *port;
+               void *pkt = NULL;
+
+               bond_mode_8023ad_stop(eth_dev);
+
+               /* Discard all messages to/from mode 4 state machines */
+               for (i = 0; i < internals->slave_count; i++) {
+                       port = &mode_8023ad_ports[internals->slaves[i].port_id];
+
+                       RTE_VERIFY(port->rx_ring != NULL);
+                       while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
+                               rte_pktmbuf_free(pkt);
+
+                       RTE_VERIFY(port->tx_ring != NULL);
+                       while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
+                               rte_pktmbuf_free(pkt);
+               }
+       }
 
        internals->active_slave_count = 0;
        internals->link_status_polling_enabled = 0;
@@ -834,7 +1058,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
                                        0, dev->pci_dev->numa_node);
 
        if (bd_tx_q == NULL)
-                       return -1;
+               return -1;
 
        bd_tx_q->queue_id = tx_queue_id;
        bd_tx_q->dev_private = dev->data->dev_private;
@@ -865,7 +1089,6 @@ bond_ethdev_tx_queue_release(void *queue)
        rte_free(queue);
 }
 
-
 static void
 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
 {
@@ -1014,11 +1237,13 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
                for (i = 0; i < internals->slave_count; i++)
                        rte_eth_promiscuous_enable(internals->slaves[i].port_id);
                break;
+       /* In mode4 promiscus mode is managed when slave is added/removed */
+       case BONDING_MODE_8023AD:
+               break;
        /* Promiscuous mode is propagated only to primary slave */
        case BONDING_MODE_ACTIVE_BACKUP:
        default:
                rte_eth_promiscuous_enable(internals->current_primary_port);
-
        }
 }
 
@@ -1040,6 +1265,9 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
                for (i = 0; i < internals->slave_count; i++)
                        rte_eth_promiscuous_disable(internals->slaves[i].port_id);
                break;
+       /* In mode4 promiscus mode is set managed when slave is added/removed */
+       case BONDING_MODE_8023AD:
+               break;
        /* Promiscuous mode is propagated only to primary slave */
        case BONDING_MODE_ACTIVE_BACKUP:
        default:
@@ -1065,7 +1293,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
        struct bond_dev_private *internals;
        struct rte_eth_link link;
 
-       int i, valid_slave = 0, active_pos = -1;
+       int i, valid_slave = 0;
+       uint8_t active_pos;
        uint8_t lsc_flag = 0;
 
        if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
@@ -1095,16 +1324,12 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
                return;
 
        /* Search for port in active port list */
-       for (i = 0; i < internals->active_slave_count; i++) {
-               if (port_id == internals->active_slaves[i]) {
-                       active_pos = i;
-                       break;
-               }
-       }
+       active_pos = find_slave_by_id(internals->active_slaves,
+                       internals->active_slave_count, port_id);
 
        rte_eth_link_get_nowait(port_id, &link);
        if (link.link_status) {
-               if (active_pos >= 0)
+               if (active_pos < internals->active_slave_count)
                        return;
 
                /* if no active slave ports then set this port to be primary port */
@@ -1118,21 +1343,19 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
                        link_properties_set(bonded_eth_dev,
                                        &(slave_eth_dev->data->dev_link));
                }
-               internals->active_slaves[internals->active_slave_count++] = port_id;
+
+               activate_slave(bonded_eth_dev, port_id);
 
                /* If user has defined the primary port then default to using it */
                if (internals->user_defined_primary_port &&
                                internals->primary_port == port_id)
                        bond_ethdev_primary_set(internals, port_id);
        } else {
-               if (active_pos < 0)
+               if (active_pos == internals->active_slave_count)
                        return;
 
                /* Remove from active slave list */
-               for (i = active_pos; i < (internals->active_slave_count - 1); i++)
-                       internals->active_slaves[i] = internals->active_slaves[i+1];
-
-               internals->active_slave_count--;
+               deactivate_slave(bonded_eth_dev, port_id);
 
                /* No active slaves, change link status to down and reset other
                 * link properties */
index 6254c84..600fc08 100644 (file)
@@ -42,6 +42,7 @@ extern "C" {
 #include <rte_spinlock.h>
 
 #include "rte_eth_bond.h"
+#include "rte_eth_bond_8023ad_private.h"
 
 #define PMD_BOND_SLAVE_PORT_KVARG                      ("slave")
 #define PMD_BOND_PRIMARY_SLAVE_KVARG           ("primary")
@@ -60,6 +61,8 @@ extern "C" {
 #define RTE_BOND_LOG(lvl, msg, ...)            \
        RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__)
 
+#define BONDING_MODE_INVALID 0xFF
+
 extern const char *pmd_bond_init_valid_arguments[];
 
 extern const char *driver_name;
@@ -89,7 +92,6 @@ struct bond_tx_queue {
        /**< Copy of TX configuration structure for queue */
 };
 
-
 /** Bonded slave devices structure */
 struct bond_ethdev_slave_ports {
        uint8_t slaves[RTE_MAX_ETHPORTS];       /**< Slave port id array */
@@ -124,7 +126,7 @@ struct bond_dev_private {
        uint8_t user_defined_mac;
        /**< Flag for whether MAC address is user defined or not */
        uint8_t promiscuous_en;
-       /**< Enabled/disable promiscuous mode on slave devices */
+       /**< Enabled/disable promiscuous mode on bonding device */
        uint8_t link_props_set;
        /**< flag to denote if the link properties are set */
 
@@ -144,6 +146,8 @@ struct bond_dev_private {
        struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
        /**< Arary of bonded slaves details */
 
+       struct mode8023ad_private mode4;
+
        struct rte_kvargs *kvlist;
 };
 
@@ -152,6 +156,20 @@ extern struct eth_dev_ops default_dev_ops;
 int
 valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
 
+/* Search given slave array to find possition of given id.
+ * Return slave pos or slaves_count if not found. */
+static inline uint8_t
+find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, uint8_t slave_id) {
+
+       uint8_t pos;
+       for (pos = 0; pos < slaves_count; pos++) {
+               if (slave_id == slaves[pos])
+                       break;
+       }
+
+       return pos;
+}
+
 int
 valid_port_id(uint8_t port_id);
 
@@ -161,6 +179,12 @@ valid_bonded_port_id(uint8_t port_id);
 int
 valid_slave_port_id(uint8_t port_id);
 
+void
+deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id);
+
+void
+activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id);
+
 void
 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
                struct rte_eth_link *slave_dev_link);
@@ -174,6 +198,9 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link,
 int
 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr);
 
+int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr);
+
 int
 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);