Link bonding
M: Declan Doherty <declan.doherty@intel.com>
-F: lib/librte_pmd_bond/
+F: drivers/net/bonding/
F: doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
F: app/test/test_link_bonding*
F: examples/bond/
PROJECT_NAME = DPDK
INPUT = doc/api/doxy-api-index.md \
+ drivers/net/bonding \
lib/librte_eal/common/include \
lib/librte_eal/common/include/generic \
lib/librte_acl \
lib/librte_pipeline \
lib/librte_port \
lib/librte_power \
- lib/librte_pmd_bond \
lib/librte_reorder \
lib/librte_ring \
lib/librte_sched \
+-- librte_mempool # memory pool manager (fixedsized objects)
+-- librte_meter # QoS metering library
+-- librte_net # various IP-related headers
- +-- librte_pmd_bond # bonding poll mode driver
+-- librte_pmd_e1000 # 1GbE poll mode drivers (igb and em)
+-- librte_pmd_fm10k # Host interface PMD driver for FM10000 Series
+-- librte_pmd_ixgbe # 10GbE poll mode driver
drivers/net
+-- af_packet # poll mode driver based on linux af_packet
+ +-- bonding # bonding poll mode driver
Applications
------------
include $(RTE_SDK)/mk/rte.vars.mk
DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += af_packet
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += bonding
include $(RTE_SDK)/mk/rte.sharelib.mk
include $(RTE_SDK)/mk/rte.subdir.mk
--- /dev/null
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_bond.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_eth_bond_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_alb.c
+
+#
+# Export include files
+#
+SYMLINK-y-include += rte_eth_bond.h
+SYMLINK-y-include += rte_eth_bond_8023ad.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_malloc
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_BOND_H_
+#define _RTE_ETH_BOND_H_
+
+/**
+ * @file rte_eth_bond.h
+ *
+ * RTE Link Bonding Ethernet Device
+ * Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple
+ * (slave) NICs into a single logical interface. The bonded device processes
+ * these interfaces based on the mode of operation specified and supported.
+ * This implementation supports 4 modes of operation round robin, active backup
+ * balance and broadcast. Providing redundant links, fault tolerance and/or
+ * load balancing of network ports
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_ether.h>
+
+/* Supported modes of operation of link bonding library */
+
+#define BONDING_MODE_ROUND_ROBIN (0)
+/**< Round Robin (Mode 0).
+ * In this mode all transmitted packets will be balanced equally across all
+ * active slaves of the bonded in a round robin fashion. */
+#define BONDING_MODE_ACTIVE_BACKUP (1)
+/**< Active Backup (Mode 1).
+ * In this mode all packets transmitted will be transmitted on the primary
+ * slave until such point as the primary slave is no longer available and then
+ * transmitted packets will be sent on the next available slaves. The primary
+ * slave can be defined by the user but defaults to the first active slave
+ * available if not specified. */
+#define BONDING_MODE_BALANCE (2)
+/**< Balance (Mode 2).
+ * In this mode all packets transmitted will be balanced across the available
+ * slaves using one of three available transmit policies - l2, l2+3 or l3+4.
+ * See BALANCE_XMIT_POLICY macros definitions for further details on transmit
+ * policies. */
+#define BONDING_MODE_BROADCAST (3)
+/**< Broadcast (Mode 3).
+ * In this mode all transmitted packets will be transmitted on all available
+ * active slaves of the bonded. */
+#define BONDING_MODE_8023AD (4)
+/**< 802.3AD (Mode 4).
+ *
+ * This mode provides auto negotiation/configuration
+ * of peers and well as link status changes monitoring using out of band
+ * LACP (link aggregation control protocol) messages. For further details of
+ * LACP specification see the IEEE 802.3ad/802.1AX standards. It is also
+ * described here
+ * https://www.kernel.org/doc/Documentation/networking/bonding.txt.
+ *
+ * Important Usage Notes:
+ * - for LACP mode to work the rx/tx burst functions must be invoked
+ * at least once every 100ms, otherwise the out-of-band LACP messages will not
+ * be handled with the expected latency and this may cause the link status to be
+ * incorrectly marked as down or failure to correctly negotiate with peers.
+ * - For optimal performance during initial handshaking the array of mbufs provided
+ * to rx_burst should be at least 2 times the slave count size.
+ *
+ */
+#define BONDING_MODE_TLB (5)
+/**< Adaptive TLB (Mode 5)
+ * This mode provides an adaptive transmit load balancing. It dynamically
+ * changes the transmitting slave, according to the computed load. Statistics
+ * are collected in 100ms intervals and scheduled every 10ms */
+#define BONDING_MODE_ALB (6)
+/**< Adaptive Load Balancing (Mode 6)
+ * This mode includes adaptive TLB and receive load balancing (RLB). In RLB the
+ * bonding driver intercepts ARP replies send by local system and overwrites its
+ * source MAC address, so that different peers send data to the server on
+ * different slave interfaces. When local system sends ARP request, it saves IP
+ * information from it. When ARP reply from that peer is received, its MAC is
+ * stored, one of slave MACs assigned and ARP reply send to that peer.
+ */
+
+/* Balance Mode Transmit Policies */
+#define BALANCE_XMIT_POLICY_LAYER2 (0)
+/**< Layer 2 (Ethernet MAC) */
+#define BALANCE_XMIT_POLICY_LAYER23 (1)
+/**< Layer 2+3 (Ethernet MAC + IP Addresses) transmit load balancing */
+#define BALANCE_XMIT_POLICY_LAYER34 (2)
+/**< Layer 3+4 (IP Addresses + UDP Ports) transmit load balancing */
+
+/**
+ * Create a bonded rte_eth_dev device
+ *
+ * @param name Name of new link bonding device.
+ * @param mode Mode to initialize bonding device in.
+ * @param socket_id Socket Id on which to allocate eth_dev resources.
+ *
+ * @return
+ * Port Id of created rte_eth_dev on success, negative value otherwise
+ */
+int
+rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id);
+
+/**
+ * Add a rte_eth_dev device as a slave to the bonded device
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param slave_port_id Port ID of slave device.
+ *
+ * @return
+ * 0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Remove a slave rte_eth_dev device from the bonded device
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param slave_port_id Port ID of slave device.
+ *
+ * @return
+ * 0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Set link bonding mode of bonded device
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param mode Bonding mode to set
+ *
+ * @return
+ * 0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode);
+
+/**
+ * Get link bonding mode of bonded device
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ *
+ * @return
+ * link bonding mode on success, negative value otherwise
+ */
+int
+rte_eth_bond_mode_get(uint8_t bonded_port_id);
+
+/**
+ * Set slave rte_eth_dev as primary slave of bonded device
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param slave_port_id Port ID of slave device.
+ *
+ * @return
+ * 0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Get primary slave of bonded device
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ *
+ * @return
+ * Port Id of primary slave on success, -1 on failure
+ */
+int
+rte_eth_bond_primary_get(uint8_t bonded_port_id);
+
+/**
+ * Populate an array with list of the slaves port id's of the bonded device
+ *
+ * @param bonded_port_id Port ID of bonded eth_dev to interrogate
+ * @param slaves Array to be populated with the current active slaves
+ * @param len Length of slaves array
+ *
+ * @return
+ * Number of slaves associated with bonded device on success,
+ * negative value otherwise
+ */
+int
+rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len);
+
+/**
+ * Populate an array with list of the active slaves port id's of the bonded
+ * device.
+ *
+ * @param bonded_port_id Port ID of bonded eth_dev to interrogate
+ * @param slaves Array to be populated with the current active slaves
+ * @param len Length of slaves array
+ *
+ * @return
+ * Number of active slaves associated with bonded device on success,
+ * negative value otherwise
+ */
+int
+rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
+ uint8_t len);
+
+/**
+ * Set explicit MAC address to use on bonded device and it's slaves.
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param mac_addr MAC Address to use on bonded device overriding
+ * slaves MAC addresses
+ *
+ * @return
+ * 0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
+ struct ether_addr *mac_addr);
+
+/**
+ * Reset bonded device to use MAC from primary slave on bonded device and it's
+ * slaves.
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ *
+ * @return
+ * 0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mac_address_reset(uint8_t bonded_port_id);
+
+/**
+ * Set the transmit policy for bonded device to use when it is operating in
+ * balance mode, this parameter is otherwise ignored in other modes of
+ * operation.
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param policy Balance mode transmission policy.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+int
+rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy);
+
+/**
+ * Get the transmit policy set on bonded device for balance mode operation
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ *
+ * @return
+ * Balance transmit policy on success, negative value otherwise.
+ */
+int
+rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id);
+
+/**
+ * Set the link monitoring frequency (in ms) for monitoring the link status of
+ * slave devices
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param internal_ms Monitoring interval in milliseconds
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+
+int
+rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms);
+
+/**
+ * Get the current link monitoring frequency (in ms) for monitoring of the link
+ * status of slave devices
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ *
+ * @return
+ * Monitoring interval on success, negative value otherwise.
+ */
+int
+rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id);
+
+
+/**
+ * Set the period in milliseconds for delaying the disabling of a bonded link
+ * when the link down status has been detected
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param delay_ms Delay period in milliseconds.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+int
+rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms);
+
+/**
+ * Get the period in milliseconds set for delaying the disabling of a bonded
+ * link when the link down status has been detected
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ *
+ * @return
+ * Delay period on success, negative value otherwise.
+ */
+int
+rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id);
+
+/**
+ * Set the period in milliseconds for delaying the enabling of a bonded link
+ * when the link up status has been detected
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ * @param delay_ms Delay period in milliseconds.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+int
+rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms);
+
+/**
+ * Get the period in milliseconds set for delaying the enabling of a bonded
+ * link when the link up status has been detected
+ *
+ * @param bonded_port_id Port ID of bonded device.
+ *
+ * @return
+ * Delay period on success, negative value otherwise.
+ */
+int
+rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_cycles.h>
+
+#include "rte_eth_bond_private.h"
+
+#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
+#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
+ bond_dbg_get_time_diff_ms(), slave_id, \
+ __func__, ##__VA_ARGS__)
+
+static uint64_t start_time;
+
+static unsigned
+bond_dbg_get_time_diff_ms(void)
+{
+ uint64_t now;
+
+ now = rte_rdtsc();
+ if (start_time == 0)
+ start_time = now;
+
+ return ((now - start_time) * 1000) / rte_get_tsc_hz();
+}
+
+static void
+bond_print_lacp(struct lacpdu *l)
+{
+ char a_address[18];
+ char p_address[18];
+ char a_state[256] = { 0 };
+ char p_state[256] = { 0 };
+
+ static const char * const state_labels[] = {
+ "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
+ };
+
+ int a_len = 0;
+ int p_len = 0;
+ uint8_t i;
+ uint8_t *addr;
+
+ addr = l->actor.port_params.system.addr_bytes;
+ snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+ addr = l->partner.port_params.system.addr_bytes;
+ snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+ for (i = 0; i < 8; i++) {
+ if ((l->actor.state >> i) & 1) {
+ a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
+ state_labels[i]);
+ }
+
+ if ((l->partner.state >> i) & 1) {
+ p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
+ state_labels[i]);
+ }
+ }
+
+ if (a_len && a_state[a_len-1] == ' ')
+ a_state[a_len-1] = '\0';
+
+ if (p_len && p_state[p_len-1] == ' ')
+ p_state[p_len-1] = '\0';
+
+ RTE_LOG(DEBUG, PMD, "LACP: {\n"\
+ " subtype= %02X\n"\
+ " ver_num=%02X\n"\
+ " actor={ tlv=%02X, len=%02X\n"\
+ " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
+ " state={ %s }\n"\
+ " }\n"\
+ " partner={ tlv=%02X, len=%02X\n"\
+ " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
+ " state={ %s }\n"\
+ " }\n"\
+ " collector={info=%02X, length=%02X, max_delay=%04X\n, " \
+ "type_term=%02X, terminator_length = %02X}\n",\
+ l->subtype,\
+ l->version_number,\
+ l->actor.tlv_type_info,\
+ l->actor.info_length,\
+ l->actor.port_params.system_priority,\
+ a_address,\
+ l->actor.port_params.key,\
+ l->actor.port_params.port_priority,\
+ l->actor.port_params.port_number,\
+ a_state,\
+ l->partner.tlv_type_info,\
+ l->partner.info_length,\
+ l->partner.port_params.system_priority,\
+ p_address,\
+ l->partner.port_params.key,\
+ l->partner.port_params.port_priority,\
+ l->partner.port_params.port_number,\
+ p_state,\
+ l->tlv_type_collector_info,\
+ l->collector_info_length,\
+ l->collector_max_delay,\
+ l->tlv_type_terminator,\
+ l->terminator_length);
+
+}
+#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
+#else
+#define BOND_PRINT_LACP(lacpdu) do { } while (0)
+#define MODE4_DEBUG(fmt, ...) do { } while (0)
+#endif
+
+static const struct ether_addr lacp_mac_addr = {
+ .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
+};
+
+struct port mode_8023ad_ports[RTE_MAX_ETHPORTS];
+
+static void
+timer_cancel(uint64_t *timer)
+{
+ *timer = 0;
+}
+
+static void
+timer_set(uint64_t *timer, uint64_t timeout)
+{
+ *timer = rte_rdtsc() + timeout;
+}
+
+/* Forces given timer to be in expired state. */
+static void
+timer_force_expired(uint64_t *timer)
+{
+ *timer = rte_rdtsc();
+}
+
+static bool
+timer_is_stopped(uint64_t *timer)
+{
+ return *timer == 0;
+}
+
+static bool
+timer_is_expired(uint64_t *timer)
+{
+ return *timer < rte_rdtsc();
+}
+
+/* Timer is in running state if it is not stopped nor expired */
+static bool
+timer_is_running(uint64_t *timer)
+{
+ return !timer_is_stopped(timer) && !timer_is_expired(timer);
+}
+
+static void
+set_warning_flags(struct port *port, uint16_t flags)
+{
+ int retval;
+ uint16_t old;
+ uint16_t new_flag = 0;
+
+ do {
+ old = port->warnings_to_show;
+ new_flag = old | flags;
+ retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
+ } while (unlikely(retval == 0));
+}
+
+static void
+show_warnings(uint8_t slave_id)
+{
+ struct port *port = &mode_8023ad_ports[slave_id];
+ uint8_t warnings;
+
+ do {
+ warnings = port->warnings_to_show;
+ } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
+
+ if (!warnings)
+ return;
+
+ if (!timer_is_expired(&port->warning_timer))
+ return;
+
+
+ timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
+ rte_get_tsc_hz() / 1000);
+
+ if (warnings & WRN_RX_QUEUE_FULL) {
+ RTE_LOG(DEBUG, PMD,
+ "Slave %u: failed to enqueue LACP packet into RX ring.\n"
+ "Receive and transmit functions must be invoked on bonded\n"
+ "interface at least 10 times per second or LACP will not\n"
+ "work correctly\n", slave_id);
+ }
+
+ if (warnings & WRN_TX_QUEUE_FULL) {
+ RTE_LOG(DEBUG, PMD,
+ "Slave %u: failed to enqueue LACP packet into TX ring.\n"
+ "Receive and transmit functions must be invoked on bonded\n"
+ "interface at least 10 times per second or LACP will not\n"
+ "work correctly\n", slave_id);
+ }
+
+ if (warnings & WRN_RX_MARKER_TO_FAST)
+ RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id);
+
+ if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
+ RTE_LOG(INFO, PMD,
+ "Slave %u: ignoring unknown slow protocol frame type", slave_id);
+ }
+
+ if (warnings & WRN_UNKNOWN_MARKER_TYPE)
+ RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id);
+
+ if (warnings & WRN_NOT_LACP_CAPABLE)
+ MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
+}
+
+static void
+record_default(struct port *port)
+{
+ /* Record default parameters for partner. Partner admin parameters
+ * are not implemented so set them to arbitrary default (last known) and
+ * mark actor that parner is in defaulted state. */
+ port->partner_state = STATE_LACP_ACTIVE;
+ ACTOR_STATE_SET(port, DEFAULTED);
+}
+
+/** Function handles rx state machine.
+ *
+ * This function implements Receive State Machine from point 5.4.12 in
+ * 802.1AX documentation. It should be called periodically.
+ *
+ * @param lacpdu LACPDU received.
+ * @param port Port on which LACPDU was received.
+ */
+static void
+rx_machine(struct bond_dev_private *internals, uint8_t slave_id,
+ struct lacpdu *lacp)
+{
+ struct port *agg, *port = &mode_8023ad_ports[slave_id];
+ uint64_t timeout;
+
+ if (SM_FLAG(port, BEGIN)) {
+ /* Initialize stuff */
+ MODE4_DEBUG("-> INITIALIZE\n");
+ SM_FLAG_CLR(port, MOVED);
+ port->selected = UNSELECTED;
+
+ record_default(port);
+
+ ACTOR_STATE_CLR(port, EXPIRED);
+ timer_cancel(&port->current_while_timer);
+
+ /* DISABLED: On initialization partner is out of sync */
+ PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+
+ /* LACP DISABLED stuff if LACP not enabled on this port */
+ if (!SM_FLAG(port, LACP_ENABLED))
+ PARTNER_STATE_CLR(port, AGGREGATION);
+ else
+ PARTNER_STATE_SET(port, AGGREGATION);
+ }
+
+ if (!SM_FLAG(port, LACP_ENABLED)) {
+ /* Update parameters only if state changed */
+ if (!timer_is_stopped(&port->current_while_timer)) {
+ port->selected = UNSELECTED;
+ record_default(port);
+ PARTNER_STATE_CLR(port, AGGREGATION);
+ ACTOR_STATE_CLR(port, EXPIRED);
+ timer_cancel(&port->current_while_timer);
+ }
+ return;
+ }
+
+ if (lacp) {
+ MODE4_DEBUG("LACP -> CURRENT\n");
+ BOND_PRINT_LACP(lacp);
+ /* Update selected flag. If partner parameters are defaulted assume they
+ * are match. If not defaulted compare LACP actor with ports parner
+ * params. */
+ if (!ACTOR_STATE(port, DEFAULTED) &&
+ (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
+ || memcmp(&port->partner, &lacp->actor.port_params,
+ sizeof(port->partner)) != 0)) {
+ MODE4_DEBUG("selected <- UNSELECTED\n");
+ port->selected = UNSELECTED;
+ }
+
+ /* Record this PDU actor params as partner params */
+ memcpy(&port->partner, &lacp->actor.port_params,
+ sizeof(struct port_params));
+ port->partner_state = lacp->actor.state;
+
+ /* Partner parameters are not defaulted any more */
+ ACTOR_STATE_CLR(port, DEFAULTED);
+
+ /* If LACP partner params match this port actor params */
+ agg = &mode_8023ad_ports[port->aggregator_port_id];
+ bool match = port->actor.system_priority ==
+ lacp->partner.port_params.system_priority &&
+ is_same_ether_addr(&agg->actor.system,
+ &lacp->partner.port_params.system) &&
+ port->actor.port_priority ==
+ lacp->partner.port_params.port_priority &&
+ port->actor.port_number ==
+ lacp->partner.port_params.port_number;
+
+ /* Update NTT if partners information are outdated (xored and masked
+ * bits are set)*/
+ uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
+ STATE_SYNCHRONIZATION | STATE_AGGREGATION;
+
+ if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
+ match == false) {
+ SM_FLAG_SET(port, NTT);
+ }
+
+ /* If LACP partner params match this port actor params */
+ if (match == true && ACTOR_STATE(port, AGGREGATION) ==
+ PARTNER_STATE(port, AGGREGATION))
+ PARTNER_STATE_SET(port, SYNCHRONIZATION);
+ else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
+ AGGREGATION))
+ PARTNER_STATE_SET(port, SYNCHRONIZATION);
+ else
+ PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+
+ if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
+ timeout = internals->mode4.short_timeout;
+ else
+ timeout = internals->mode4.long_timeout;
+
+ timer_set(&port->current_while_timer, timeout);
+ ACTOR_STATE_CLR(port, EXPIRED);
+ return; /* No state change */
+ }
+
+ /* If CURRENT state timer is not running (stopped or expired)
+ * transit to EXPIRED state from DISABLED or CURRENT */
+ if (!timer_is_running(&port->current_while_timer)) {
+ ACTOR_STATE_SET(port, EXPIRED);
+ PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+ PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
+ timer_set(&port->current_while_timer, internals->mode4.short_timeout);
+ }
+}
+
+/**
+ * Function handles periodic tx state machine.
+ *
+ * Function implements Periodic Transmission state machine from point 5.4.13
+ * in 802.1AX documentation. It should be called periodically.
+ *
+ * @param port Port to handle state machine.
+ */
+static void
+periodic_machine(struct bond_dev_private *internals, uint8_t slave_id)
+{
+ struct port *port = &mode_8023ad_ports[slave_id];
+ /* Calculate if either site is LACP enabled */
+ uint64_t timeout;
+ uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
+ PARTNER_STATE(port, LACP_ACTIVE);
+
+ uint8_t is_partner_fast, was_partner_fast;
+ /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
+ if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
+ timer_cancel(&port->periodic_timer);
+ timer_force_expired(&port->tx_machine_timer);
+ SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
+
+ MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
+ SM_FLAG(port, BEGIN) ? "begind " : "",
+ SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
+ active ? "LACP active " : "LACP pasive ");
+ return;
+ }
+
+ is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
+ was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
+
+ /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
+ * Other case: check if timer expire or partners settings changed. */
+ if (!timer_is_stopped(&port->periodic_timer)) {
+ if (timer_is_expired(&port->periodic_timer)) {
+ SM_FLAG_SET(port, NTT);
+ } else if (is_partner_fast != was_partner_fast) {
+ /* Partners timeout was slow and now it is fast -> send LACP.
+ * In other case (was fast and now it is slow) just switch
+ * timeout to slow without forcing send of LACP (because standard
+ * say so)*/
+ if (!is_partner_fast)
+ SM_FLAG_SET(port, NTT);
+ } else
+ return; /* Nothing changed */
+ }
+
+ /* Handle state transition to FAST/SLOW LACP timeout */
+ if (is_partner_fast) {
+ timeout = internals->mode4.fast_periodic_timeout;
+ SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
+ } else {
+ timeout = internals->mode4.slow_periodic_timeout;
+ SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
+ }
+
+ timer_set(&port->periodic_timer, timeout);
+}
+
+/**
+ * Function handles mux state machine.
+ *
+ * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
+ * It should be called periodically.
+ *
+ * @param port Port to handle state machine.
+ */
+static void
+mux_machine(struct bond_dev_private *internals, uint8_t slave_id)
+{
+ struct port *port = &mode_8023ad_ports[slave_id];
+
+ /* Save current state for later use */
+ const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
+ STATE_COLLECTING;
+
+ /* Enter DETACHED state on BEGIN condition or from any other state if
+ * port was unselected */
+ if (SM_FLAG(port, BEGIN) ||
+ port->selected == UNSELECTED || (port->selected == STANDBY &&
+ (port->actor_state & state_mask) != 0)) {
+ /* detach mux from aggregator */
+ port->actor_state &= ~state_mask;
+ /* Set ntt to true if BEGIN condition or transition from any other state
+ * which is indicated that wait_while_timer was started */
+ if (SM_FLAG(port, BEGIN) ||
+ !timer_is_stopped(&port->wait_while_timer)) {
+ SM_FLAG_SET(port, NTT);
+ MODE4_DEBUG("-> DETACHED\n");
+ }
+ timer_cancel(&port->wait_while_timer);
+ }
+
+ if (timer_is_stopped(&port->wait_while_timer)) {
+ if (port->selected == SELECTED || port->selected == STANDBY) {
+ timer_set(&port->wait_while_timer,
+ internals->mode4.aggregate_wait_timeout);
+
+ MODE4_DEBUG("DETACHED -> WAITING\n");
+ }
+ /* Waiting state entered */
+ return;
+ }
+
+ /* Transit next state if port is ready */
+ if (!timer_is_expired(&port->wait_while_timer))
+ return;
+
+ if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
+ !PARTNER_STATE(port, SYNCHRONIZATION)) {
+ /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
+ * sync transit to ATACHED state. */
+ ACTOR_STATE_CLR(port, DISTRIBUTING);
+ ACTOR_STATE_CLR(port, COLLECTING);
+ /* Clear actor sync to activate transit ATACHED in condition bellow */
+ ACTOR_STATE_CLR(port, SYNCHRONIZATION);
+ MODE4_DEBUG("Out of sync -> ATTACHED\n");
+ }
+
+ if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
+ /* attach mux to aggregator */
+ RTE_VERIFY((port->actor_state & (STATE_COLLECTING |
+ STATE_DISTRIBUTING)) == 0);
+
+ ACTOR_STATE_SET(port, SYNCHRONIZATION);
+ SM_FLAG_SET(port, NTT);
+ MODE4_DEBUG("ATTACHED Entered\n");
+ } else if (!ACTOR_STATE(port, COLLECTING)) {
+ /* Start collecting if in sync */
+ if (PARTNER_STATE(port, SYNCHRONIZATION)) {
+ MODE4_DEBUG("ATTACHED -> COLLECTING\n");
+ ACTOR_STATE_SET(port, COLLECTING);
+ SM_FLAG_SET(port, NTT);
+ }
+ } else if (ACTOR_STATE(port, COLLECTING)) {
+ /* Check if partner is in COLLECTING state. If so this port can
+ * distribute frames to it */
+ if (!ACTOR_STATE(port, DISTRIBUTING)) {
+ if (PARTNER_STATE(port, COLLECTING)) {
+ /* Enable DISTRIBUTING if partner is collecting */
+ ACTOR_STATE_SET(port, DISTRIBUTING);
+ SM_FLAG_SET(port, NTT);
+ MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
+ RTE_LOG(INFO, PMD,
+ "Bond %u: slave id %u distributing started.\n",
+ internals->port_id, slave_id);
+ }
+ } else {
+ if (!PARTNER_STATE(port, COLLECTING)) {
+ /* Disable DISTRIBUTING (enter COLLECTING state) if partner
+ * is not collecting */
+ ACTOR_STATE_CLR(port, DISTRIBUTING);
+ SM_FLAG_SET(port, NTT);
+ MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
+ RTE_LOG(INFO, PMD,
+ "Bond %u: slave id %u distributing stopped.\n",
+ internals->port_id, slave_id);
+ }
+ }
+ }
+}
+
+/**
+ * Function handles transmit state machine.
+ *
+ * Function implements Transmit Machine from point 5.4.16 in 802.1AX
+ * documentation.
+ *
+ * @param port
+ */
+static void
+tx_machine(struct bond_dev_private *internals, uint8_t slave_id)
+{
+ struct port *agg, *port = &mode_8023ad_ports[slave_id];
+
+ struct rte_mbuf *lacp_pkt = NULL;
+ struct lacpdu_header *hdr;
+ struct lacpdu *lacpdu;
+
+ /* If periodic timer is not running periodic machine is in NO PERIODIC and
+ * according to 802.3ax standard tx machine should not transmit any frames
+ * and set ntt to false. */
+ if (timer_is_stopped(&port->periodic_timer))
+ SM_FLAG_CLR(port, NTT);
+
+ if (!SM_FLAG(port, NTT))
+ return;
+
+ if (!timer_is_expired(&port->tx_machine_timer))
+ return;
+
+ lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
+ if (lacp_pkt == NULL) {
+ RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
+ return;
+ }
+
+ lacp_pkt->data_len = sizeof(*hdr);
+ lacp_pkt->pkt_len = sizeof(*hdr);
+
+ hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
+
+ /* Source and destination MAC */
+ ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
+ rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
+ hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
+
+ lacpdu = &hdr->lacpdu;
+ memset(lacpdu, 0, sizeof(*lacpdu));
+
+ /* Initialize LACP part */
+ lacpdu->subtype = SLOW_SUBTYPE_LACP;
+ lacpdu->version_number = 1;
+
+ /* ACTOR */
+ lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
+ lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
+ memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
+ sizeof(port->actor));
+ agg = &mode_8023ad_ports[port->aggregator_port_id];
+ ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system);
+ lacpdu->actor.state = port->actor_state;
+
+ /* PARTNER */
+ lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
+ lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
+ memcpy(&lacpdu->partner.port_params, &port->partner,
+ sizeof(struct port_params));
+ lacpdu->partner.state = port->partner_state;
+
+ /* Other fields */
+ lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
+ lacpdu->collector_info_length = 0x10;
+ lacpdu->collector_max_delay = 0;
+
+ lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
+ lacpdu->terminator_length = 0;
+
+ if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
+ /* If TX ring full, drop packet and free message. Retransmission
+ * will happen in next function call. */
+ rte_pktmbuf_free(lacp_pkt);
+ set_warning_flags(port, WRN_TX_QUEUE_FULL);
+ return;
+ }
+
+ MODE4_DEBUG("sending LACP frame\n");
+ BOND_PRINT_LACP(lacpdu);
+
+ timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
+ SM_FLAG_CLR(port, NTT);
+}
+
+/**
+ * Function assigns port to aggregator.
+ *
+ * @param bond_dev_private Pointer to bond_dev_private structure.
+ * @param port_pos Port to assign.
+ */
+static void
+selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
+{
+ struct port *agg, *port;
+ uint8_t slaves_count, new_agg_id, i;
+ uint8_t *slaves;
+
+ slaves = internals->active_slaves;
+ slaves_count = internals->active_slave_count;
+ port = &mode_8023ad_ports[slave_id];
+
+ /* Search for aggregator suitable for this port */
+ for (i = 0; i < slaves_count; ++i) {
+ agg = &mode_8023ad_ports[slaves[i]];
+ /* Skip ports that are not aggreagators */
+ if (agg->aggregator_port_id != slaves[i])
+ continue;
+
+ /* Actors system ID is not checked since all slave device have the same
+ * ID (MAC address). */
+ if ((agg->actor.key == port->actor.key &&
+ agg->partner.system_priority == port->partner.system_priority &&
+ is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
+ && (agg->partner.key == port->partner.key)) &&
+ is_zero_ether_addr(&port->partner.system) != 1 &&
+ (agg->actor.key &
+ rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
+
+ break;
+ }
+ }
+
+ /* By default, port uses it self as agregator */
+ if (i == slaves_count)
+ new_agg_id = slave_id;
+ else
+ new_agg_id = slaves[i];
+
+ if (new_agg_id != port->aggregator_port_id) {
+ port->aggregator_port_id = new_agg_id;
+
+ MODE4_DEBUG("-> SELECTED: ID=%3u\n"
+ "\t%s aggregator ID=%3u\n",
+ port->aggregator_port_id,
+ port->aggregator_port_id == slave_id ?
+ "aggregator not found, using default" : "aggregator found",
+ port->aggregator_port_id);
+ }
+
+ port->selected = SELECTED;
+}
+
+/* Function maps DPDK speed to bonding speed stored in key field */
+static uint16_t
+link_speed_key(uint16_t speed) {
+ uint16_t key_speed;
+
+ switch (speed) {
+ case ETH_LINK_SPEED_AUTONEG:
+ key_speed = 0x00;
+ break;
+ case ETH_LINK_SPEED_10:
+ key_speed = BOND_LINK_SPEED_KEY_10M;
+ break;
+ case ETH_LINK_SPEED_100:
+ key_speed = BOND_LINK_SPEED_KEY_100M;
+ break;
+ case ETH_LINK_SPEED_1000:
+ key_speed = BOND_LINK_SPEED_KEY_1000M;
+ break;
+ case ETH_LINK_SPEED_10G:
+ key_speed = BOND_LINK_SPEED_KEY_10G;
+ break;
+ case ETH_LINK_SPEED_20G:
+ key_speed = BOND_LINK_SPEED_KEY_20G;
+ break;
+ case ETH_LINK_SPEED_40G:
+ key_speed = BOND_LINK_SPEED_KEY_40G;
+ break;
+ default:
+ /* Unknown speed*/
+ key_speed = 0xFFFF;
+ }
+
+ return key_speed;
+}
+
+static void
+bond_mode_8023ad_periodic_cb(void *arg)
+{
+ struct rte_eth_dev *bond_dev = arg;
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct port *port;
+ struct rte_eth_link link_info;
+ struct ether_addr slave_addr;
+
+ void *pkt = NULL;
+ uint8_t i, slave_id;
+
+
+ /* Update link status on each port */
+ for (i = 0; i < internals->active_slave_count; i++) {
+ uint16_t key;
+
+ slave_id = internals->active_slaves[i];
+ rte_eth_link_get(slave_id, &link_info);
+ rte_eth_macaddr_get(slave_id, &slave_addr);
+
+ if (link_info.link_status != 0) {
+ key = link_speed_key(link_info.link_speed) << 1;
+ if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
+ key |= BOND_LINK_FULL_DUPLEX_KEY;
+ } else
+ key = 0;
+
+ port = &mode_8023ad_ports[slave_id];
+
+ key = rte_cpu_to_be_16(key);
+ if (key != port->actor.key) {
+ if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
+ set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
+
+ port->actor.key = key;
+ SM_FLAG_SET(port, NTT);
+ }
+
+ if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
+ ether_addr_copy(&slave_addr, &port->actor.system);
+ if (port->aggregator_port_id == slave_id)
+ SM_FLAG_SET(port, NTT);
+ }
+ }
+
+ for (i = 0; i < internals->active_slave_count; i++) {
+ slave_id = internals->active_slaves[i];
+ port = &mode_8023ad_ports[slave_id];
+
+ if ((port->actor.key &
+ rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
+
+ SM_FLAG_SET(port, BEGIN);
+
+ /* LACP is disabled on half duples or link is down */
+ if (SM_FLAG(port, LACP_ENABLED)) {
+ /* If port was enabled set it to BEGIN state */
+ SM_FLAG_CLR(port, LACP_ENABLED);
+ ACTOR_STATE_CLR(port, DISTRIBUTING);
+ ACTOR_STATE_CLR(port, COLLECTING);
+ }
+
+ /* Skip this port processing */
+ continue;
+ }
+
+ SM_FLAG_SET(port, LACP_ENABLED);
+
+ /* Find LACP packet to this port. Do not check subtype, it is done in
+ * function that queued packet */
+ if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
+ struct rte_mbuf *lacp_pkt = pkt;
+ struct lacpdu_header *lacp;
+
+ lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
+ RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
+
+ /* This is LACP frame so pass it to rx_machine */
+ rx_machine(internals, slave_id, &lacp->lacpdu);
+ rte_pktmbuf_free(lacp_pkt);
+ } else
+ rx_machine(internals, slave_id, NULL);
+
+ periodic_machine(internals, slave_id);
+ mux_machine(internals, slave_id);
+ tx_machine(internals, slave_id);
+ selection_logic(internals, slave_id);
+
+ SM_FLAG_CLR(port, BEGIN);
+ show_warnings(slave_id);
+ }
+
+ rte_eal_alarm_set(internals->mode4.update_timeout_us,
+ bond_mode_8023ad_periodic_cb, arg);
+}
+
+void
+bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+
+ struct port *port = &mode_8023ad_ports[slave_id];
+ struct port_params initial = {
+ .system = { { 0 } },
+ .system_priority = rte_cpu_to_be_16(0xFFFF),
+ .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
+ .port_priority = rte_cpu_to_be_16(0x00FF),
+ .port_number = 0,
+ };
+
+ char mem_name[RTE_ETH_NAME_MAX_LEN];
+ uint8_t socket_id;
+ unsigned element_size;
+
+ /* Given slave mus not be in active list */
+ RTE_VERIFY(find_slave_by_id(internals->active_slaves,
+ internals->active_slave_count, slave_id) == internals->active_slave_count);
+
+ memcpy(&port->actor, &initial, sizeof(struct port_params));
+ /* Standard requires that port ID must be grater than 0.
+ * Add 1 do get corresponding port_number */
+ port->actor.port_number = rte_cpu_to_be_16((uint16_t)slave_id + 1);
+
+ memcpy(&port->partner, &initial, sizeof(struct port_params));
+
+ /* default states */
+ port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
+ port->partner_state = STATE_LACP_ACTIVE;
+ port->sm_flags = SM_FLAGS_BEGIN;
+
+ /* use this port as agregator */
+ port->aggregator_port_id = slave_id;
+ rte_eth_promiscuous_enable(slave_id);
+
+ timer_cancel(&port->warning_timer);
+
+ if (port->mbuf_pool != NULL)
+ return;
+
+ RTE_VERIFY(port->rx_ring == NULL);
+ RTE_VERIFY(port->tx_ring == NULL);
+ socket_id = rte_eth_devices[slave_id].pci_dev->numa_node;
+
+ element_size = sizeof(struct slow_protocol_frame) + sizeof(struct rte_mbuf)
+ + RTE_PKTMBUF_HEADROOM;
+
+ /* How big memory pool should be? If driver will not
+ * free packets quick enough there will be ENOMEM in tx_machine.
+ * For now give 511 pkts * max number of queued TX packets per slave.
+ * Hope it will be enough. */
+ snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
+ port->mbuf_pool = rte_mempool_create(mem_name,
+ BOND_MODE_8023AX_SLAVE_TX_PKTS * 512 - 1,
+ element_size,
+ RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
+ NULL, rte_pktmbuf_init, NULL, socket_id, MEMPOOL_F_NO_SPREAD);
+
+ /* Any memory allocation failure in initalization is critical because
+ * resources can't be free, so reinitialization is impossible. */
+ if (port->mbuf_pool == NULL) {
+ rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
+ slave_id, mem_name, rte_strerror(rte_errno));
+ }
+
+ snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
+ port->rx_ring = rte_ring_create(mem_name,
+ rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
+
+ if (port->rx_ring == NULL) {
+ rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
+ mem_name, rte_strerror(rte_errno));
+ }
+
+ /* TX ring is at least one pkt longer to make room for marker packet. */
+ snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
+ port->tx_ring = rte_ring_create(mem_name,
+ rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
+
+ if (port->tx_ring == NULL) {
+ rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
+ mem_name, rte_strerror(rte_errno));
+ }
+}
+
+int
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
+ uint8_t slave_id)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ void *pkt = NULL;
+ struct port *port;
+ uint8_t i;
+
+ /* Given slave mus be in active list */
+ RTE_VERIFY(find_slave_by_id(internals->active_slaves,
+ internals->active_slave_count, slave_id) < internals->active_slave_count);
+
+ /* Exclude slave from transmit policy. If this slave is an aggregator
+ * make all aggregated slaves unselected to force sellection logic
+ * to select suitable aggregator for this port. */
+ for (i = 0; i < internals->active_slave_count; i++) {
+ port = &mode_8023ad_ports[internals->active_slaves[i]];
+ if (port->aggregator_port_id != slave_id)
+ continue;
+
+ port->selected = UNSELECTED;
+
+ /* Use default aggregator */
+ port->aggregator_port_id = internals->active_slaves[i];
+ }
+
+ port = &mode_8023ad_ports[slave_id];
+ port->selected = UNSELECTED;
+ port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
+ STATE_COLLECTING);
+
+ while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
+ rte_pktmbuf_free((struct rte_mbuf *)pkt);
+
+ while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
+ rte_pktmbuf_free((struct rte_mbuf *)pkt);
+ return 0;
+}
+
+void
+bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct ether_addr slave_addr;
+ struct port *slave, *agg_slave;
+ uint8_t slave_id, i, j;
+
+ bond_mode_8023ad_stop(bond_dev);
+
+ for (i = 0; i < internals->active_slave_count; i++) {
+ slave_id = internals->active_slaves[i];
+ slave = &mode_8023ad_ports[slave_id];
+ rte_eth_macaddr_get(slave_id, &slave_addr);
+
+ if (is_same_ether_addr(&slave_addr, &slave->actor.system))
+ continue;
+
+ ether_addr_copy(&slave_addr, &slave->actor.system);
+ /* Do nothing if this port is not an aggregator. In other case
+ * Set NTT flag on every port that use this aggregator. */
+ if (slave->aggregator_port_id != slave_id)
+ continue;
+
+ for (j = 0; j < internals->active_slave_count; j++) {
+ agg_slave = &mode_8023ad_ports[internals->active_slaves[j]];
+ if (agg_slave->aggregator_port_id == slave_id)
+ SM_FLAG_SET(agg_slave, NTT);
+ }
+ }
+
+ if (bond_dev->data->dev_started)
+ bond_mode_8023ad_start(bond_dev);
+}
+
+void
+bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
+ struct rte_eth_bond_8023ad_conf *conf)
+{
+ struct bond_dev_private *internals = dev->data->dev_private;
+ struct mode8023ad_private *mode4 = &internals->mode4;
+ uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
+
+ conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
+ conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
+ conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
+ conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
+ conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
+ conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
+ conf->update_timeout_ms = mode4->update_timeout_us / 1000;
+}
+
+void
+bond_mode_8023ad_setup(struct rte_eth_dev *dev,
+ struct rte_eth_bond_8023ad_conf *conf)
+{
+ struct rte_eth_bond_8023ad_conf def_conf;
+ struct bond_dev_private *internals = dev->data->dev_private;
+ struct mode8023ad_private *mode4 = &internals->mode4;
+ uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
+
+ if (conf == NULL) {
+ conf = &def_conf;
+ conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
+ conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
+ conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
+ conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
+ conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
+ conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
+ conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
+ conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
+ }
+
+ mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
+ mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
+ mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
+ mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
+ mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
+ mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
+ mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
+ mode4->update_timeout_us = conf->update_timeout_ms * 1000;
+}
+
+int
+bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ uint8_t i;
+
+ for (i = 0; i < internals->active_slave_count; i++)
+ bond_mode_8023ad_activate_slave(bond_dev, i);
+
+ return 0;
+}
+
+int
+bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
+{
+ return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
+ &bond_mode_8023ad_periodic_cb, bond_dev);
+}
+
+void
+bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
+{
+ rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
+}
+
+void
+bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
+ uint8_t slave_id, struct rte_mbuf *pkt)
+{
+ struct mode8023ad_private *mode4 = &internals->mode4;
+ struct port *port = &mode_8023ad_ports[slave_id];
+ struct marker_header *m_hdr;
+ uint64_t marker_timer, old_marker_timer;
+ int retval;
+ uint8_t wrn, subtype;
+ /* If packet is a marker, we send response now by reusing given packet
+ * and update only source MAC, destination MAC is multicast so don't
+ * update it. Other frames will be handled later by state machines */
+ subtype = rte_pktmbuf_mtod(pkt,
+ struct slow_protocol_frame *)->slow_protocol.subtype;
+
+ if (subtype == SLOW_SUBTYPE_MARKER) {
+ m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
+
+ if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
+ wrn = WRN_UNKNOWN_MARKER_TYPE;
+ goto free_out;
+ }
+
+ /* Setup marker timer. Do it in loop in case concurent access. */
+ do {
+ old_marker_timer = port->rx_marker_timer;
+ if (!timer_is_expired(&old_marker_timer)) {
+ wrn = WRN_RX_MARKER_TO_FAST;
+ goto free_out;
+ }
+
+ timer_set(&marker_timer, mode4->rx_marker_timeout);
+ retval = rte_atomic64_cmpset(&port->rx_marker_timer,
+ old_marker_timer, marker_timer);
+ } while (unlikely(retval == 0));
+
+ m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
+ rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
+
+ if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) {
+ /* reset timer */
+ port->rx_marker_timer = 0;
+ wrn = WRN_TX_QUEUE_FULL;
+ goto free_out;
+ }
+ } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
+ if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
+ /* If RX fing full free lacpdu message and drop packet */
+ wrn = WRN_RX_QUEUE_FULL;
+ goto free_out;
+ }
+ } else {
+ wrn = WRN_UNKNOWN_SLOW_TYPE;
+ goto free_out;
+ }
+
+ return;
+
+free_out:
+ set_warning_flags(port, wrn);
+ rte_pktmbuf_free(pkt);
+}
+
+int
+rte_eth_bond_8023ad_conf_get(uint8_t port_id,
+ struct rte_eth_bond_8023ad_conf *conf)
+{
+ struct rte_eth_dev *bond_dev;
+
+ if (valid_bonded_port_id(port_id) != 0)
+ return -EINVAL;
+
+ if (conf == NULL)
+ return -EINVAL;
+
+ bond_dev = &rte_eth_devices[port_id];
+ bond_mode_8023ad_conf_get(bond_dev, conf);
+ return 0;
+}
+
+int
+rte_eth_bond_8023ad_setup(uint8_t port_id,
+ struct rte_eth_bond_8023ad_conf *conf)
+{
+ struct rte_eth_dev *bond_dev;
+
+ if (valid_bonded_port_id(port_id) != 0)
+ return -EINVAL;
+
+ if (conf != NULL) {
+ /* Basic sanity check */
+ if (conf->slow_periodic_ms == 0 ||
+ conf->fast_periodic_ms >= conf->slow_periodic_ms ||
+ conf->long_timeout_ms == 0 ||
+ conf->short_timeout_ms >= conf->long_timeout_ms ||
+ conf->aggregate_wait_timeout_ms == 0 ||
+ conf->tx_period_ms == 0 ||
+ conf->rx_marker_period_ms == 0 ||
+ conf->update_timeout_ms == 0) {
+ RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n");
+ return -EINVAL;
+ }
+ }
+
+ bond_dev = &rte_eth_devices[port_id];
+ bond_mode_8023ad_setup(bond_dev, conf);
+
+ return 0;
+}
+
+int
+rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
+ struct rte_eth_bond_8023ad_slave_info *info)
+{
+ struct rte_eth_dev *bond_dev;
+ struct bond_dev_private *internals;
+ struct port *port;
+
+ if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
+ rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
+ return -EINVAL;
+
+ bond_dev = &rte_eth_devices[port_id];
+
+ internals = bond_dev->data->dev_private;
+ if (find_slave_by_id(internals->active_slaves,
+ internals->active_slave_count, slave_id) ==
+ internals->active_slave_count)
+ return -EINVAL;
+
+ port = &mode_8023ad_ports[slave_id];
+ info->selected = port->selected;
+
+ info->actor_state = port->actor_state;
+ rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
+
+ info->partner_state = port->partner_state;
+ rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
+
+ info->agg_port_id = port->aggregator_port_id;
+ return 0;
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_ETH_BOND_8023AD_H_
+#define RTE_ETH_BOND_8023AD_H_
+
+#include <rte_ether.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Actor/partner states
+ */
+#define STATE_LACP_ACTIVE 0x01
+#define STATE_LACP_SHORT_TIMEOUT 0x02
+#define STATE_AGGREGATION 0x04
+#define STATE_SYNCHRONIZATION 0x08
+#define STATE_COLLECTING 0x10
+#define STATE_DISTRIBUTING 0x20
+/** Partners parameters are defaulted */
+#define STATE_DEFAULTED 0x40
+#define STATE_EXPIRED 0x80
+
+#define TLV_TYPE_ACTOR_INFORMATION 0x01
+#define TLV_TYPE_PARTNER_INFORMATION 0x02
+#define TLV_TYPE_COLLECTOR_INFORMATION 0x03
+#define TLV_TYPE_TERMINATOR_INFORMATION 0x00
+
+#define SLOW_SUBTYPE_LACP 0x01
+#define SLOW_SUBTYPE_MARKER 0x02
+
+#define MARKER_TLV_TYPE_INFO 0x01
+#define MARKER_TLV_TYPE_RESP 0x02
+
+enum rte_bond_8023ad_selection {
+ UNSELECTED,
+ STANDBY,
+ SELECTED
+};
+
+/** Generic slow protocol structure */
+struct slow_protocol {
+ uint8_t subtype;
+ uint8_t reserved_119[119];
+} __attribute__((__packed__));
+
+/** Generic slow protocol frame type structure */
+struct slow_protocol_frame {
+ struct ether_hdr eth_hdr;
+ struct slow_protocol slow_protocol;
+} __attribute__((__packed__));
+
+struct port_params {
+ uint16_t system_priority;
+ /**< System priority (unused in current implementation) */
+ struct ether_addr system;
+ /**< System ID - Slave MAC address, same as bonding MAC address */
+ uint16_t key;
+ /**< Speed information (implementation dependednt) and duplex. */
+ uint16_t port_priority;
+ /**< Priority of this (unused in current implementation) */
+ uint16_t port_number;
+ /**< Port number. It corresponds to slave port id. */
+} __attribute__((__packed__));
+
+struct lacpdu_actor_partner_params {
+ uint8_t tlv_type_info;
+ uint8_t info_length;
+ struct port_params port_params;
+ uint8_t state;
+ uint8_t reserved_3[3];
+} __attribute__((__packed__));
+
+/** LACPDU structure (5.4.2 in 802.1AX documentation). */
+struct lacpdu {
+ uint8_t subtype;
+ uint8_t version_number;
+
+ struct lacpdu_actor_partner_params actor;
+ struct lacpdu_actor_partner_params partner;
+
+ uint8_t tlv_type_collector_info;
+ uint8_t collector_info_length;
+ uint16_t collector_max_delay;
+ uint8_t reserved_12[12];
+
+ uint8_t tlv_type_terminator;
+ uint8_t terminator_length;
+ uint8_t reserved_50[50];
+} __attribute__((__packed__));
+
+/** LACPDU frame: Contains ethernet header and LACPDU. */
+struct lacpdu_header {
+ struct ether_hdr eth_hdr;
+ struct lacpdu lacpdu;
+} __attribute__((__packed__));
+
+struct marker {
+ uint8_t subtype;
+ uint8_t version_number;
+
+ uint8_t tlv_type_marker;
+ uint8_t info_length;
+ uint16_t requester_port;
+ struct ether_addr requester_system;
+ uint32_t requester_transaction_id;
+ uint8_t reserved_2[2];
+
+ uint8_t tlv_type_terminator;
+ uint8_t terminator_length;
+ uint8_t reserved_90[90];
+} __attribute__((__packed__));
+
+struct marker_header {
+ struct ether_hdr eth_hdr;
+ struct marker marker;
+} __attribute__((__packed__));
+
+struct rte_eth_bond_8023ad_conf {
+ uint32_t fast_periodic_ms;
+ uint32_t slow_periodic_ms;
+ uint32_t short_timeout_ms;
+ uint32_t long_timeout_ms;
+ uint32_t aggregate_wait_timeout_ms;
+ uint32_t tx_period_ms;
+ uint32_t rx_marker_period_ms;
+ uint32_t update_timeout_ms;
+};
+
+struct rte_eth_bond_8023ad_slave_info {
+ enum rte_bond_8023ad_selection selected;
+ uint8_t actor_state;
+ struct port_params actor;
+ uint8_t partner_state;
+ struct port_params partner;
+ uint8_t agg_port_id;
+};
+
+/**
+ * @internal
+ *
+ * Function returns current configuration of 802.3AX mode.
+ *
+ * @param port_id Bonding device id
+ * @param conf Pointer to timeout structure.
+ *
+ * @return
+ * 0 - if ok
+ * -EINVAL if conf is NULL
+ */
+int
+rte_eth_bond_8023ad_conf_get(uint8_t port_id,
+ struct rte_eth_bond_8023ad_conf *conf);
+
+/**
+ * @internal
+ *
+ * Function set new configuration of 802.3AX mode.
+ *
+ * @param port_id Bonding device id
+ * @param conf Configuration, if NULL set default configuration.
+ * @return
+ * 0 - if ok
+ * -EINVAL if configuration is invalid.
+ */
+int
+rte_eth_bond_8023ad_setup(uint8_t port_id,
+ struct rte_eth_bond_8023ad_conf *conf);
+
+/**
+ * @internal
+ *
+ * Function returns current state of given slave device.
+ *
+ * @param slave_id Port id of valid slave.
+ * @param conf buffer for configuration
+ * @return
+ * 0 - if ok
+ * -EINVAL if conf is NULL or slave id is invalid (not a slave of given
+ * bonded device or is not inactive).
+ */
+int
+rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
+ struct rte_eth_bond_8023ad_slave_info *conf);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_ETH_BOND_8023AD_H_ */
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_ETH_BOND_8023AD_PRIVATE_H_
+#define RTE_ETH_BOND_8023AD_PRIVATE_H_
+
+#include <stdint.h>
+
+#include <rte_ether.h>
+#include <rte_byteorder.h>
+#include <rte_atomic.h>
+
+#include "rte_eth_bond_8023ad.h"
+
+#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100
+/** Maximum number of packets to one slave queued in TX ring. */
+#define BOND_MODE_8023AX_SLAVE_RX_PKTS 3
+/** Maximum number of LACP packets from one slave queued in TX ring. */
+#define BOND_MODE_8023AX_SLAVE_TX_PKTS 1
+/**
+ * Timeouts deffinitions (5.4.4 in 802.1AX documentation).
+ */
+#define BOND_8023AD_FAST_PERIODIC_MS 900
+#define BOND_8023AD_SLOW_PERIODIC_MS 29000
+#define BOND_8023AD_SHORT_TIMEOUT_MS 3000
+#define BOND_8023AD_LONG_TIMEOUT_MS 90000
+#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000
+#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000
+#define BOND_8023AD_TX_MACHINE_PERIOD_MS 500
+#define BOND_8023AD_RX_MARKER_PERIOD_MS 2000
+
+/**
+ * Interval of showing warning message from state machines. All messages will
+ * be held (and gathered together) to prevent flooding.
+ * This is no parto of 802.1AX standard.
+ */
+#define BOND_8023AD_WARNINGS_PERIOD_MS 1000
+
+
+
+/**
+ * State machine flags
+ */
+#define SM_FLAGS_BEGIN 0x0001
+#define SM_FLAGS_LACP_ENABLED 0x0002
+#define SM_FLAGS_ACTOR_CHURN 0x0004
+#define SM_FLAGS_PARTNER_CHURN 0x0008
+#define SM_FLAGS_MOVED 0x0100
+#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200
+#define SM_FLAGS_NTT 0x0400
+
+#define BOND_LINK_FULL_DUPLEX_KEY 0x01
+#define BOND_LINK_SPEED_KEY_10M 0x02
+#define BOND_LINK_SPEED_KEY_100M 0x04
+#define BOND_LINK_SPEED_KEY_1000M 0x08
+#define BOND_LINK_SPEED_KEY_10G 0x10
+#define BOND_LINK_SPEED_KEY_20G 0x11
+#define BOND_LINK_SPEED_KEY_40G 0x12
+
+#define WRN_RX_MARKER_TO_FAST 0x01
+#define WRN_UNKNOWN_SLOW_TYPE 0x02
+#define WRN_UNKNOWN_MARKER_TYPE 0x04
+#define WRN_NOT_LACP_CAPABLE 0x08
+#define WRN_RX_QUEUE_FULL 0x10
+#define WRN_TX_QUEUE_FULL 0x20
+
+#define CHECK_FLAGS(_variable, _f) ((_variable) & (_f))
+#define SET_FLAGS(_variable, _f) ((_variable) |= (_f))
+#define CLEAR_FLAGS(_variable, _f) ((_variable) &= ~(_f))
+
+#define SM_FLAG(_p, _f) (!!CHECK_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f))
+#define SM_FLAG_SET(_p, _f) SET_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)
+#define SM_FLAG_CLR(_p, _f) CLEAR_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)
+
+#define ACTOR_STATE(_p, _f) (!!CHECK_FLAGS((_p)->actor_state, STATE_ ## _f))
+#define ACTOR_STATE_SET(_p, _f) SET_FLAGS((_p)->actor_state, STATE_ ## _f)
+#define ACTOR_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->actor_state, STATE_ ## _f)
+
+#define PARTNER_STATE(_p, _f) (!!CHECK_FLAGS((_p)->partner_state, STATE_ ## _f))
+#define PARTNER_STATE_SET(_p, _f) SET_FLAGS((_p)->partner_state, STATE_ ## _f)
+#define PARTNER_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->partner_state, STATE_ ## _f)
+
+/** Variables associated with each port (5.4.7 in 802.1AX documentation). */
+struct port {
+ /**
+ * The operational values of the Actor's state parameters. Bitmask
+ * of port states.
+ */
+ uint8_t actor_state;
+
+ /** The operational Actor's port parameters */
+ struct port_params actor;
+
+ /**
+ * The operational value of the Actor's view of the current values of
+ * the Partner's state parameters. The Actor sets this variable either
+ * to the value received from the Partner in an LACPDU, or to the value
+ * of Partner_Admin_Port_State. Bitmask of port states.
+ */
+ uint8_t partner_state;
+
+ /** The operational Partner's port parameters */
+ struct port_params partner;
+
+ /* Additional port parameters not listed in documentation */
+ /** State machine flags */
+ uint16_t sm_flags;
+ enum rte_bond_8023ad_selection selected;
+
+ uint64_t current_while_timer;
+ uint64_t periodic_timer;
+ uint64_t wait_while_timer;
+ uint64_t tx_machine_timer;
+ uint64_t tx_marker_timer;
+ /* Agregator parameters */
+ /** Used aggregator port ID */
+ uint16_t aggregator_port_id;
+
+ /** Memory pool used to allocate rings */
+ struct rte_mempool *mbuf_pool;
+
+ /** Ring of LACP packets from RX burst function */
+ struct rte_ring *rx_ring;
+
+ /** Ring of slow protocol packets (LACP and MARKERS) to TX burst function */
+ struct rte_ring *tx_ring;
+
+ /** Timer which is also used as mutex. If is 0 (not running) RX marker
+ * packet might be responded. Otherwise shall be dropped. It is zeroed in
+ * mode 4 callback function after expire. */
+ volatile uint64_t rx_marker_timer;
+
+ uint64_t warning_timer;
+ volatile uint16_t warnings_to_show;
+};
+
+struct mode8023ad_private {
+ uint64_t fast_periodic_timeout;
+ uint64_t slow_periodic_timeout;
+ uint64_t short_timeout;
+ uint64_t long_timeout;
+ uint64_t aggregate_wait_timeout;
+ uint64_t tx_period_timeout;
+ uint64_t rx_marker_timeout;
+ uint64_t update_timeout_us;
+};
+
+/**
+ * @internal
+ * The pool of *port* structures. The size of the pool
+ * is configured at compile-time in the <rte_eth_bond_8023ad.c> file.
+ */
+extern struct port mode_8023ad_ports[];
+
+/* Forward declaration */
+struct bond_dev_private;
+
+/**
+ * @internal
+ *
+ * Get configuration of bonded interface.
+ *
+ *
+ * @param dev Bonded interface
+ * @param conf returned configuration
+ */
+void
+bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
+ struct rte_eth_bond_8023ad_conf *conf);
+
+/**
+ * @internal
+ *
+ * Set mode 4 configuration of bonded interface.
+ *
+ * @pre Bonded interface must be stopped.
+ *
+ * @param dev Bonded interface
+ * @param conf new configuration. If NULL set default configuration.
+ */
+void
+bond_mode_8023ad_setup(struct rte_eth_dev *dev,
+ struct rte_eth_bond_8023ad_conf *conf);
+
+/**
+ * @internal
+ *
+ * Enables 802.1AX mode and all active slaves on bonded interface.
+ *
+ * @param dev Bonded interface
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+int
+bond_mode_8023ad_enable(struct rte_eth_dev *dev);
+
+/**
+ * @internal
+ *
+ * Disables 802.1AX mode of the bonded interface and slaves.
+ *
+ * @param dev Bonded interface
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+int bond_mode_8023ad_disable(struct rte_eth_dev *dev);
+
+/**
+ * @internal
+ *
+ * Starts 802.3AX state machines management logic.
+ * @param dev Bonded interface
+ * @return
+ * 0 if machines was started, 1 if machines was already running,
+ * negative value otherwise.
+ */
+int
+bond_mode_8023ad_start(struct rte_eth_dev *dev);
+
+/**
+ * @internal
+ *
+ * Stops 802.3AX state machines management logic.
+ * @param dev Bonded interface
+ * @return
+ * 0 if this call stopped state machines, -ENOENT if alarm was not set.
+ */
+void
+bond_mode_8023ad_stop(struct rte_eth_dev *dev);
+
+/**
+ * @internal
+ *
+ * Passes given slow packet to state machines management logic.
+ * @param internals Bonded device private data.
+ * @param slave_id Slave port id.
+ * @param slot_pkt Slow packet.
+ */
+void
+bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
+ uint8_t slave_id, struct rte_mbuf *pkt);
+
+/**
+ * @internal
+ *
+ * Appends given slave used slave
+ *
+ * @param dev Bonded interface.
+ * @param port_id Slave port ID to be added
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+void
+bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint8_t port_id);
+
+/**
+ * @internal
+ *
+ * Denitializes and removes given slave from 802.1AX mode.
+ *
+ * @param dev Bonded interface.
+ * @param slave_num Position of slave in active_slaves array
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+int
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos);
+
+/**
+ * Updates state when MAC was changed on bonded device or one of its slaves.
+ * @param bond_dev Bonded device
+ */
+void
+bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev);
+
+#endif /* RTE_ETH_BOND_8023AD_H_ */
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_alb.h"
+
+static inline uint8_t
+simple_hash(uint8_t *hash_start, int hash_size)
+{
+ int i;
+ uint8_t hash;
+
+ hash = 0;
+ for (i = 0; i < hash_size; ++i)
+ hash ^= hash_start[i];
+
+ return hash;
+}
+
+static uint8_t
+calculate_slave(struct bond_dev_private *internals)
+{
+ uint8_t idx;
+
+ idx = (internals->mode6.last_slave + 1) % internals->active_slave_count;
+ internals->mode6.last_slave = idx;
+ return internals->active_slaves[idx];
+}
+
+int
+bond_mode_alb_enable(struct rte_eth_dev *bond_dev)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct client_data *hash_table = internals->mode6.client_table;
+
+ uint16_t data_size;
+ char mem_name[RTE_ETH_NAME_MAX_LEN];
+ int socket_id = bond_dev->pci_dev->numa_node;
+
+ /* Fill hash table with initial values */
+ memset(hash_table, 0, sizeof(struct client_data) * ALB_HASH_TABLE_SIZE);
+ rte_spinlock_init(&internals->mode6.lock);
+ internals->mode6.last_slave = ALB_NULL_INDEX;
+ internals->mode6.ntt = 0;
+
+ /* Initialize memory pool for ARP packets to send */
+ if (internals->mode6.mempool == NULL) {
+ /*
+ * 256 is size of ETH header, ARP header and nested VLAN headers.
+ * The value is chosen to be cache aligned.
+ */
+ data_size = 256 + RTE_PKTMBUF_HEADROOM;
+ snprintf(mem_name, sizeof(mem_name), "%s_MODE6", bond_dev->data->name);
+ internals->mode6.mempool = rte_pktmbuf_pool_create(mem_name,
+ 512 * RTE_MAX_ETHPORTS,
+ RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
+ 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
+ 0, data_size, socket_id);
+
+ if (internals->mode6.mempool == NULL) {
+ RTE_LOG(ERR, PMD, "%s: Failed to initialize ALB mempool.\n",
+ bond_dev->data->name);
+ rte_panic(
+ "Failed to allocate memory pool ('%s')\n"
+ "for bond device '%s'\n",
+ mem_name, bond_dev->data->name);
+ }
+ }
+
+ return 0;
+}
+
+void bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
+ struct bond_dev_private *internals) {
+ struct arp_hdr *arp;
+
+ struct client_data *hash_table = internals->mode6.client_table;
+ struct client_data *client_info;
+
+ uint8_t hash_index;
+
+ arp = (struct arp_hdr *) ((char *) (eth_h + 1) + offset);
+
+ /* ARP Requests are forwarded to the application with no changes */
+ if (arp->arp_op != rte_cpu_to_be_16(ARP_OP_REPLY))
+ return;
+
+ /* From now on, we analyze only ARP Reply packets */
+ hash_index = simple_hash((uint8_t *) &arp->arp_data.arp_sip,
+ sizeof(arp->arp_data.arp_sip));
+ client_info = &hash_table[hash_index];
+
+ /*
+ * We got reply for ARP Request send by the application. We need to
+ * update client table when received data differ from what is stored
+ * in ALB table and issue sending update packet to that slave.
+ */
+ rte_spinlock_lock(&internals->mode6.lock);
+ if (client_info->in_use == 0 ||
+ client_info->app_ip != arp->arp_data.arp_tip ||
+ client_info->cli_ip != arp->arp_data.arp_sip ||
+ !is_same_ether_addr(&client_info->cli_mac, &arp->arp_data.arp_sha) ||
+ client_info->vlan_count != offset / sizeof(struct vlan_hdr) ||
+ memcmp(client_info->vlan, eth_h + 1, offset) != 0
+ ) {
+ client_info->in_use = 1;
+ client_info->app_ip = arp->arp_data.arp_tip;
+ client_info->cli_ip = arp->arp_data.arp_sip;
+ ether_addr_copy(&arp->arp_data.arp_sha, &client_info->cli_mac);
+ client_info->slave_idx = calculate_slave(internals);
+ rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
+ ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_tha);
+ memcpy(client_info->vlan, eth_h + 1, offset);
+ client_info->vlan_count = offset / sizeof(struct vlan_hdr);
+ }
+ internals->mode6.ntt = 1;
+ rte_spinlock_unlock(&internals->mode6.lock);
+}
+
+uint8_t
+bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
+ struct bond_dev_private *internals)
+{
+ struct arp_hdr *arp;
+
+ struct client_data *hash_table = internals->mode6.client_table;
+ struct client_data *client_info;
+
+ uint8_t hash_index;
+
+ struct ether_addr bonding_mac;
+
+ arp = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
+
+ /*
+ * Traffic with src MAC other than bonding should be sent on
+ * current primary port.
+ */
+ rte_eth_macaddr_get(internals->port_id, &bonding_mac);
+ if (!is_same_ether_addr(&bonding_mac, &arp->arp_data.arp_sha)) {
+ rte_eth_macaddr_get(internals->current_primary_port,
+ &arp->arp_data.arp_sha);
+ return internals->current_primary_port;
+ }
+
+ hash_index = simple_hash((uint8_t *)&arp->arp_data.arp_tip,
+ sizeof(uint32_t));
+ client_info = &hash_table[hash_index];
+
+ rte_spinlock_lock(&internals->mode6.lock);
+ if (arp->arp_op == rte_cpu_to_be_16(ARP_OP_REPLY)) {
+ if (client_info->in_use) {
+ if (client_info->app_ip == arp->arp_data.arp_sip &&
+ client_info->cli_ip == arp->arp_data.arp_tip) {
+ /* Entry is already assigned to this client */
+ if (!is_broadcast_ether_addr(&arp->arp_data.arp_tha)) {
+ ether_addr_copy(&arp->arp_data.arp_tha,
+ &client_info->cli_mac);
+ }
+ rte_eth_macaddr_get(client_info->slave_idx,
+ &client_info->app_mac);
+ ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
+ memcpy(client_info->vlan, eth_h + 1, offset);
+ client_info->vlan_count = offset / sizeof(struct vlan_hdr);
+ rte_spinlock_unlock(&internals->mode6.lock);
+ return client_info->slave_idx;
+ }
+ }
+
+ /* Assign new slave to this client and update src mac in ARP */
+ client_info->in_use = 1;
+ client_info->ntt = 0;
+ client_info->app_ip = arp->arp_data.arp_sip;
+ ether_addr_copy(&arp->arp_data.arp_tha, &client_info->cli_mac);
+ client_info->cli_ip = arp->arp_data.arp_tip;
+ client_info->slave_idx = calculate_slave(internals);
+ rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
+ ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
+ memcpy(client_info->vlan, eth_h + 1, offset);
+ client_info->vlan_count = offset / sizeof(struct vlan_hdr);
+ rte_spinlock_unlock(&internals->mode6.lock);
+ return client_info->slave_idx;
+ }
+
+ /* If packet is not ARP Reply, send it on current primary port. */
+ rte_spinlock_unlock(&internals->mode6.lock);
+ rte_eth_macaddr_get(internals->current_primary_port,
+ &arp->arp_data.arp_sha);
+ return internals->current_primary_port;
+}
+
+uint8_t
+bond_mode_alb_arp_upd(struct client_data *client_info,
+ struct rte_mbuf *pkt, struct bond_dev_private *internals)
+{
+ struct ether_hdr *eth_h;
+ struct arp_hdr *arp_h;
+ uint8_t slave_idx;
+
+ rte_spinlock_lock(&internals->mode6.lock);
+ eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+
+ ether_addr_copy(&client_info->app_mac, ð_h->s_addr);
+ ether_addr_copy(&client_info->cli_mac, ð_h->d_addr);
+ if (client_info->vlan_count > 0)
+ eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+ else
+ eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP);
+
+ arp_h = (struct arp_hdr *)((char *)eth_h + sizeof(struct ether_hdr)
+ + client_info->vlan_count * sizeof(struct vlan_hdr));
+
+ memcpy(eth_h + 1, client_info->vlan,
+ client_info->vlan_count * sizeof(struct vlan_hdr));
+
+ ether_addr_copy(&client_info->app_mac, &arp_h->arp_data.arp_sha);
+ arp_h->arp_data.arp_sip = client_info->app_ip;
+ ether_addr_copy(&client_info->cli_mac, &arp_h->arp_data.arp_tha);
+ arp_h->arp_data.arp_tip = client_info->cli_ip;
+
+ arp_h->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER);
+ arp_h->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+ arp_h->arp_hln = ETHER_ADDR_LEN;
+ arp_h->arp_pln = sizeof(uint32_t);
+ arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
+
+ slave_idx = client_info->slave_idx;
+ rte_spinlock_unlock(&internals->mode6.lock);
+
+ return slave_idx;
+}
+
+void
+bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct client_data *client_info;
+
+ int i;
+
+ /* If active slave count is 0, it's pointless to refresh alb table */
+ if (internals->active_slave_count <= 0)
+ return;
+
+ rte_spinlock_lock(&internals->mode6.lock);
+ internals->mode6.last_slave = ALB_NULL_INDEX;
+
+ for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
+ client_info = &internals->mode6.client_table[i];
+ if (client_info->in_use) {
+ client_info->slave_idx = calculate_slave(internals);
+ rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
+ internals->mode6.ntt = 1;
+ }
+ }
+ rte_spinlock_unlock(&internals->mode6.lock);
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_ETH_BOND_ALB_H_
+#define RTE_ETH_BOND_ALB_H_
+
+#include <rte_ether.h>
+#include <rte_arp.h>
+
+#define ALB_HASH_TABLE_SIZE 256
+#define ALB_NULL_INDEX 0xFFFFFFFF
+
+struct client_data {
+ /** ARP data of single client */
+ struct ether_addr app_mac;
+ /**< MAC address of application running DPDK */
+ uint32_t app_ip;
+ /**< IP address of application running DPDK */
+ struct ether_addr cli_mac;
+ /**< Client MAC address */
+ uint32_t cli_ip;
+ /**< Client IP address */
+
+ uint8_t slave_idx;
+ /**< Index of slave on which we connect with that client */
+ uint8_t in_use;
+ /**< Flag indicating if entry in client table is currently used */
+ uint8_t ntt;
+ /**< Flag indicating if we need to send update to this client on next tx */
+
+ struct vlan_hdr vlan[2];
+ /**< Content of vlan headers */
+ uint8_t vlan_count;
+ /**< Number of nested vlan headers */
+};
+
+struct mode_alb_private {
+ struct client_data client_table[ALB_HASH_TABLE_SIZE];
+ /**< Hash table storing ARP data of every client connected */
+ struct rte_mempool *mempool;
+ /**< Mempool for creating ARP update packets */
+ uint8_t ntt;
+ /**< Flag indicating if we need to send update to any client on next tx */
+ uint32_t last_slave;
+ /**< Index of last used slave in client table */
+ rte_spinlock_t lock;
+};
+
+/**
+ * ALB mode initialization.
+ *
+ * @param bond_dev Pointer to bonding device.
+ *
+ * @return
+ * Error code - 0 on success.
+ */
+int
+bond_mode_alb_enable(struct rte_eth_dev *bond_dev);
+
+/**
+ * Function handles ARP packet reception. If received ARP request, it is
+ * forwarded to application without changes. If it is ARP reply, client table
+ * is updated.
+ *
+ * @param eth_h ETH header of received packet.
+ * @param offset Vlan header offset.
+ * @param internals Bonding data.
+ */
+void
+bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
+ struct bond_dev_private *internals);
+
+/**
+ * Function handles ARP packet transmission. It also decides on which slave
+ * send that packet. If packet is ARP Request, it is send on primary slave.
+ * If it is ARP Reply, it is send on slave stored in client table for that
+ * connection. On Reply function also updates data in client table.
+ *
+ * @param eth_h ETH header of transmitted packet.
+ * @param offset Vlan header offset.
+ * @param internals Bonding data.
+ *
+ * @return
+ * Index of slave on which packet should be sent.
+ */
+uint8_t
+bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
+ struct bond_dev_private *internals);
+
+/**
+ * Function fills packet with ARP data from client_info.
+ *
+ * @param client_info Data of client to which packet is sent.
+ * @param pkt Pointer to packet which is sent.
+ * @param internals Bonding data.
+ *
+ * @return
+ * Index of slawe on which packet should be sent.
+ */
+uint8_t
+bond_mode_alb_arp_upd(struct client_data *client_info,
+ struct rte_mbuf *pkt, struct bond_dev_private *internals);
+
+/**
+ * Function updates slave indexes of active connections.
+ *
+ * @param bond_dev Pointer to bonded device struct.
+ */
+void
+bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev);
+
+#endif /* RTE_ETH_BOND_ALB_H_ */
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_tcp.h>
+
+#include "rte_eth_bond.h"
+#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad_private.h"
+
+#define DEFAULT_POLLING_INTERVAL_10_MS (10)
+
+int
+valid_bonded_ethdev(struct rte_eth_dev *eth_dev)
+{
+ size_t len;
+
+ /* Check valid pointer */
+ if (eth_dev->driver->pci_drv.name == NULL || driver_name == NULL)
+ return -1;
+
+ /* Check string lengths are equal */
+ len = strlen(driver_name);
+ if (strlen(eth_dev->driver->pci_drv.name) != len)
+ return -1;
+
+ /* Compare strings */
+ return strncmp(eth_dev->driver->pci_drv.name, driver_name, len);
+}
+
+int
+valid_port_id(uint8_t port_id)
+{
+ /* Verify that port id is valid */
+ int ethdev_count = rte_eth_dev_count();
+ if (port_id >= ethdev_count) {
+ RTE_BOND_LOG(ERR, "Port Id %d is greater than rte_eth_dev_count %d",
+ port_id, ethdev_count);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+valid_bonded_port_id(uint8_t port_id)
+{
+ /* Verify that port id's are valid */
+ if (valid_port_id(port_id))
+ return -1;
+
+ /* Verify that bonded_port_id refers to a bonded port */
+ if (valid_bonded_ethdev(&rte_eth_devices[port_id])) {
+ RTE_BOND_LOG(ERR, "Specified port Id %d is not a bonded eth_dev device",
+ port_id);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+valid_slave_port_id(uint8_t port_id)
+{
+ /* Verify that port id's are valid */
+ if (valid_port_id(port_id))
+ return -1;
+
+ /* Verify that port_id refers to a non bonded port */
+ if (!valid_bonded_ethdev(&rte_eth_devices[port_id]))
+ return -1;
+
+ return 0;
+}
+
+void
+activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+{
+ struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t active_count = internals->active_slave_count;
+
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_activate_slave(eth_dev, port_id);
+
+ if (internals->mode == BONDING_MODE_TLB
+ || internals->mode == BONDING_MODE_ALB) {
+
+ internals->tlb_slaves_order[active_count] = port_id;
+ }
+
+ RTE_VERIFY(internals->active_slave_count <
+ (RTE_DIM(internals->active_slaves) - 1));
+
+ internals->active_slaves[internals->active_slave_count] = port_id;
+ internals->active_slave_count++;
+
+ if (internals->mode == BONDING_MODE_TLB)
+ bond_tlb_activate_slave(internals);
+ if (internals->mode == BONDING_MODE_ALB)
+ bond_mode_alb_client_list_upd(eth_dev);
+}
+
+void
+deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+{
+ uint8_t slave_pos;
+ struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t active_count = internals->active_slave_count;
+
+ if (internals->mode == BONDING_MODE_8023AD) {
+ bond_mode_8023ad_stop(eth_dev);
+ bond_mode_8023ad_deactivate_slave(eth_dev, port_id);
+ } else if (internals->mode == BONDING_MODE_TLB
+ || internals->mode == BONDING_MODE_ALB)
+ bond_tlb_disable(internals);
+
+ slave_pos = find_slave_by_id(internals->active_slaves, active_count,
+ port_id);
+
+ /* If slave was not at the end of the list
+ * shift active slaves up active array list */
+ if (slave_pos < active_count) {
+ active_count--;
+ memmove(internals->active_slaves + slave_pos,
+ internals->active_slaves + slave_pos + 1,
+ (active_count - slave_pos) *
+ sizeof(internals->active_slaves[0]));
+ }
+
+ RTE_VERIFY(active_count < RTE_DIM(internals->active_slaves));
+ internals->active_slave_count = active_count;
+
+ if (eth_dev->data->dev_started) {
+ if (internals->mode == BONDING_MODE_8023AD) {
+ bond_mode_8023ad_start(eth_dev);
+ } else if (internals->mode == BONDING_MODE_TLB) {
+ bond_tlb_enable(internals);
+ } else if (internals->mode == BONDING_MODE_ALB) {
+ bond_tlb_enable(internals);
+ bond_mode_alb_client_list_upd(eth_dev);
+ }
+ }
+}
+
+uint8_t
+number_of_sockets(void)
+{
+ int sockets = 0;
+ int i;
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+
+ for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
+ if (sockets < ms[i].socket_id)
+ sockets = ms[i].socket_id;
+ }
+
+ /* Number of sockets = maximum socket_id + 1 */
+ return ++sockets;
+}
+
+const char *driver_name = "Link Bonding PMD";
+
+int
+rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
+{
+ struct rte_pci_device *pci_dev = NULL;
+ struct bond_dev_private *internals = NULL;
+ struct rte_eth_dev *eth_dev = NULL;
+ struct eth_driver *eth_drv = NULL;
+ struct rte_pci_driver *pci_drv = NULL;
+ struct rte_pci_id *pci_id_table = NULL;
+ /* now do all data allocation - for eth_dev structure, dummy pci driver
+ * and internal (private) data
+ */
+
+ if (name == NULL) {
+ RTE_BOND_LOG(ERR, "Invalid name specified");
+ goto err;
+ }
+
+ if (socket_id >= number_of_sockets()) {
+ RTE_BOND_LOG(ERR,
+ "Invalid socket id specified to create bonded device on.");
+ goto err;
+ }
+
+ pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id);
+ if (pci_dev == NULL) {
+ RTE_BOND_LOG(ERR, "Unable to malloc pci dev on socket");
+ goto err;
+ }
+
+ eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id);
+ if (eth_drv == NULL) {
+ RTE_BOND_LOG(ERR, "Unable to malloc eth_drv on socket");
+ goto err;
+ }
+
+ pci_drv = ð_drv->pci_drv;
+
+ pci_id_table = rte_zmalloc_socket(name, sizeof(*pci_id_table), 0, socket_id);
+ if (pci_id_table == NULL) {
+ RTE_BOND_LOG(ERR, "Unable to malloc pci_id_table on socket");
+ goto err;
+ }
+ pci_id_table->device_id = PCI_ANY_ID;
+ pci_id_table->subsystem_device_id = PCI_ANY_ID;
+ pci_id_table->vendor_id = PCI_ANY_ID;
+ pci_id_table->subsystem_vendor_id = PCI_ANY_ID;
+
+ pci_drv->id_table = pci_id_table;
+ pci_drv->drv_flags = RTE_PCI_DRV_INTR_LSC;
+
+ internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id);
+ if (internals == NULL) {
+ RTE_BOND_LOG(ERR, "Unable to malloc internals on socket");
+ goto err;
+ }
+
+ /* reserve an ethdev entry */
+ eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+ if (eth_dev == NULL) {
+ RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
+ goto err;
+ }
+
+ pci_dev->numa_node = socket_id;
+ pci_drv->name = driver_name;
+
+ eth_dev->driver = eth_drv;
+ eth_dev->data->dev_private = internals;
+ eth_dev->data->nb_rx_queues = (uint16_t)1;
+ eth_dev->data->nb_tx_queues = (uint16_t)1;
+
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+ eth_dev->data->dev_link.link_status = 0;
+
+ eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
+ socket_id);
+
+ eth_dev->data->dev_started = 0;
+ eth_dev->data->promiscuous = 0;
+ eth_dev->data->scattered_rx = 0;
+ eth_dev->data->all_multicast = 0;
+
+ eth_dev->dev_ops = &default_dev_ops;
+ eth_dev->pci_dev = pci_dev;
+
+ rte_spinlock_init(&internals->lock);
+
+ internals->port_id = eth_dev->data->port_id;
+ internals->mode = BONDING_MODE_INVALID;
+ internals->current_primary_port = 0;
+ internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
+ internals->xmit_hash = xmit_l2_hash;
+ internals->user_defined_mac = 0;
+ internals->link_props_set = 0;
+
+ internals->link_status_polling_enabled = 0;
+
+ internals->link_status_polling_interval_ms = DEFAULT_POLLING_INTERVAL_10_MS;
+ internals->link_down_delay_ms = 0;
+ internals->link_up_delay_ms = 0;
+
+ internals->slave_count = 0;
+ internals->active_slave_count = 0;
+ internals->rx_offload_capa = 0;
+ internals->tx_offload_capa = 0;
+
+ memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
+ memset(internals->slaves, 0, sizeof(internals->slaves));
+
+ /* Set mode 4 default configuration */
+ bond_mode_8023ad_setup(eth_dev, NULL);
+ if (bond_ethdev_mode_set(eth_dev, mode)) {
+ RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
+ eth_dev->data->port_id, mode);
+ goto err;
+ }
+
+ return eth_dev->data->port_id;
+
+err:
+ rte_free(pci_dev);
+ rte_free(pci_id_table);
+ rte_free(eth_drv);
+ rte_free(internals);
+
+ return -1;
+}
+
+static int
+__eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+ struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
+ struct bond_dev_private *internals;
+ struct bond_dev_private *temp_internals;
+ struct rte_eth_link link_props;
+ struct rte_eth_dev_info dev_info;
+
+ int i, j;
+
+ if (valid_slave_port_id(slave_port_id) != 0)
+ return -1;
+
+ bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+ internals = bonded_eth_dev->data->dev_private;
+
+ /* Verify that new slave device is not already a slave of another
+ * bonded device */
+ for (i = rte_eth_dev_count()-1; i >= 0; i--) {
+ if (valid_bonded_ethdev(&rte_eth_devices[i]) == 0) {
+ temp_internals = rte_eth_devices[i].data->dev_private;
+
+ for (j = 0; j < temp_internals->slave_count; j++) {
+ /* Device already a slave of a bonded device */
+ if (temp_internals->slaves[j].port_id == slave_port_id) {
+ RTE_BOND_LOG(ERR, "Slave port %d is already a slave",
+ slave_port_id);
+ return -1;
+ }
+ }
+ }
+ }
+
+ slave_eth_dev = &rte_eth_devices[slave_port_id];
+
+ /* Add slave details to bonded device */
+ slave_add(internals, slave_eth_dev);
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ rte_eth_dev_info_get(slave_port_id, &dev_info);
+
+ if (internals->slave_count < 1) {
+ /* if MAC is not user defined then use MAC of first slave add to
+ * bonded device */
+ if (!internals->user_defined_mac)
+ mac_address_set(bonded_eth_dev, slave_eth_dev->data->mac_addrs);
+
+ /* Inherit eth dev link properties from first slave */
+ link_properties_set(bonded_eth_dev,
+ &(slave_eth_dev->data->dev_link));
+
+ /* Make primary slave */
+ internals->primary_port = slave_port_id;
+
+ /* Take the first dev's offload capabilities */
+ internals->rx_offload_capa = dev_info.rx_offload_capa;
+ internals->tx_offload_capa = dev_info.tx_offload_capa;
+
+ } else {
+ /* Check slave link properties are supported if props are set,
+ * all slaves must be the same */
+ if (internals->link_props_set) {
+ if (link_properties_valid(&(bonded_eth_dev->data->dev_link),
+ &(slave_eth_dev->data->dev_link))) {
+ RTE_BOND_LOG(ERR,
+ "Slave port %d link speed/duplex not supported",
+ slave_port_id);
+ return -1;
+ }
+ } else {
+ link_properties_set(bonded_eth_dev,
+ &(slave_eth_dev->data->dev_link));
+ }
+ internals->rx_offload_capa &= dev_info.rx_offload_capa;
+ internals->tx_offload_capa &= dev_info.tx_offload_capa;
+ }
+
+ internals->slave_count++;
+
+ /* Update all slave devices MACs*/
+ mac_address_slaves_update(bonded_eth_dev);
+
+ if (bonded_eth_dev->data->dev_started) {
+ if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) {
+ RTE_BOND_LOG(ERR, "rte_bond_slaves_configure: port=%d",
+ slave_port_id);
+ return -1;
+ }
+ }
+
+ /* Register link status change callback with bonded device pointer as
+ * argument*/
+ rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
+ bond_ethdev_lsc_event_callback, &bonded_eth_dev->data->port_id);
+
+ /* If bonded device is started then we can add the slave to our active
+ * slave array */
+ if (bonded_eth_dev->data->dev_started) {
+ rte_eth_link_get_nowait(slave_port_id, &link_props);
+
+ if (link_props.link_status == 1)
+ activate_slave(bonded_eth_dev, slave_port_id);
+ }
+ return 0;
+
+}
+
+int
+rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+ struct rte_eth_dev *bonded_eth_dev;
+ struct bond_dev_private *internals;
+
+ int retval;
+
+ /* Verify that port id's are valid bonded and slave ports */
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+ internals = bonded_eth_dev->data->dev_private;
+
+ rte_spinlock_lock(&internals->lock);
+
+ retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id);
+
+ rte_spinlock_unlock(&internals->lock);
+
+ return retval;
+}
+
+static int
+__eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+ struct rte_eth_dev *bonded_eth_dev;
+ struct bond_dev_private *internals;
+
+ int i, slave_idx;
+
+ if (valid_slave_port_id(slave_port_id) != 0)
+ return -1;
+
+ bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+ internals = bonded_eth_dev->data->dev_private;
+
+ /* first remove from active slave list */
+ slave_idx = find_slave_by_id(internals->active_slaves,
+ internals->active_slave_count, slave_port_id);
+
+ if (slave_idx < internals->active_slave_count)
+ deactivate_slave(bonded_eth_dev, slave_port_id);
+
+ slave_idx = -1;
+ /* now find in slave list */
+ for (i = 0; i < internals->slave_count; i++)
+ if (internals->slaves[i].port_id == slave_port_id) {
+ slave_idx = i;
+ break;
+ }
+
+ if (slave_idx < 0) {
+ RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %d",
+ internals->slave_count);
+ return -1;
+ }
+
+ /* Un-register link status change callback with bonded device pointer as
+ * argument*/
+ rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
+ bond_ethdev_lsc_event_callback,
+ &rte_eth_devices[bonded_port_id].data->port_id);
+
+ /* Restore original MAC address of slave device */
+ mac_address_set(&rte_eth_devices[slave_port_id],
+ &(internals->slaves[slave_idx].persisted_mac_addr));
+
+ slave_remove(internals, &rte_eth_devices[slave_port_id]);
+
+ /* first slave in the active list will be the primary by default,
+ * otherwise use first device in list */
+ if (internals->current_primary_port == slave_port_id) {
+ if (internals->active_slave_count > 0)
+ internals->current_primary_port = internals->active_slaves[0];
+ else if (internals->slave_count > 0)
+ internals->current_primary_port = internals->slaves[0].port_id;
+ else
+ internals->primary_port = 0;
+ }
+
+ if (internals->active_slave_count < 1) {
+ /* reset device link properties as no slaves are active */
+ link_properties_reset(&rte_eth_devices[bonded_port_id]);
+
+ /* if no slaves are any longer attached to bonded device and MAC is not
+ * user defined then clear MAC of bonded device as it will be reset
+ * when a new slave is added */
+ if (internals->slave_count < 1 && !internals->user_defined_mac)
+ memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0,
+ sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs)));
+ }
+ if (internals->slave_count == 0) {
+ internals->rx_offload_capa = 0;
+ internals->tx_offload_capa = 0;
+ }
+ return 0;
+}
+
+int
+rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+ struct rte_eth_dev *bonded_eth_dev;
+ struct bond_dev_private *internals;
+ int retval;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+ internals = bonded_eth_dev->data->dev_private;
+
+ rte_spinlock_lock(&internals->lock);
+
+ retval = __eth_bond_slave_remove_lock_free(bonded_port_id, slave_port_id);
+
+ rte_spinlock_unlock(&internals->lock);
+
+ return retval;
+}
+
+int
+rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode)
+{
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ return bond_ethdev_mode_set(&rte_eth_devices[bonded_port_id], mode);
+}
+
+int
+rte_eth_bond_mode_get(uint8_t bonded_port_id)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ return internals->mode;
+}
+
+int
+rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ if (valid_slave_port_id(slave_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ internals->user_defined_primary_port = 1;
+ internals->primary_port = slave_port_id;
+
+ bond_ethdev_primary_set(internals, slave_port_id);
+
+ return 0;
+}
+
+int
+rte_eth_bond_primary_get(uint8_t bonded_port_id)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ if (internals->slave_count < 1)
+ return -1;
+
+ return internals->current_primary_port;
+}
+
+int
+rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
+{
+ struct bond_dev_private *internals;
+ uint8_t i;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ if (slaves == NULL)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ if (internals->slave_count > len)
+ return -1;
+
+ for (i = 0; i < internals->slave_count; i++)
+ slaves[i] = internals->slaves[i].port_id;
+
+ return internals->slave_count;
+}
+
+int
+rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
+ uint8_t len)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ if (slaves == NULL)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ if (internals->active_slave_count > len)
+ return -1;
+
+ memcpy(slaves, internals->active_slaves, internals->active_slave_count);
+
+ return internals->active_slave_count;
+}
+
+int
+rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
+ struct ether_addr *mac_addr)
+{
+ struct rte_eth_dev *bonded_eth_dev;
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+ internals = bonded_eth_dev->data->dev_private;
+
+ /* Set MAC Address of Bonded Device */
+ if (mac_address_set(bonded_eth_dev, mac_addr))
+ return -1;
+
+ internals->user_defined_mac = 1;
+
+ /* Update all slave devices MACs*/
+ if (internals->slave_count > 0)
+ return mac_address_slaves_update(bonded_eth_dev);
+
+ return 0;
+}
+
+int
+rte_eth_bond_mac_address_reset(uint8_t bonded_port_id)
+{
+ struct rte_eth_dev *bonded_eth_dev;
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+ internals = bonded_eth_dev->data->dev_private;
+
+ internals->user_defined_mac = 0;
+
+ if (internals->slave_count > 0) {
+ /* Set MAC Address of Bonded Device */
+ if (mac_address_set(bonded_eth_dev,
+ &internals->slaves[internals->primary_port].persisted_mac_addr)
+ != 0) {
+ RTE_BOND_LOG(ERR, "Failed to set MAC address on bonded device");
+ return -1;
+ }
+ /* Update all slave devices MAC addresses */
+ return mac_address_slaves_update(bonded_eth_dev);
+ }
+ /* No need to update anything as no slaves present */
+ return 0;
+}
+
+int
+rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ switch (policy) {
+ case BALANCE_XMIT_POLICY_LAYER2:
+ internals->balance_xmit_policy = policy;
+ internals->xmit_hash = xmit_l2_hash;
+ break;
+ case BALANCE_XMIT_POLICY_LAYER23:
+ internals->balance_xmit_policy = policy;
+ internals->xmit_hash = xmit_l23_hash;
+ break;
+ case BALANCE_XMIT_POLICY_LAYER34:
+ internals->balance_xmit_policy = policy;
+ internals->xmit_hash = xmit_l34_hash;
+ break;
+
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+int
+rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ return internals->balance_xmit_policy;
+}
+
+int
+rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+ internals->link_status_polling_interval_ms = internal_ms;
+
+ return 0;
+}
+
+int
+rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ return internals->link_status_polling_interval_ms;
+}
+
+int
+rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
+
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+ internals->link_down_delay_ms = delay_ms;
+
+ return 0;
+}
+
+int
+rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ return internals->link_down_delay_ms;
+}
+
+int
+rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
+
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+ internals->link_up_delay_ms = delay_ms;
+
+ return 0;
+}
+
+int
+rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id)
+{
+ struct bond_dev_private *internals;
+
+ if (valid_bonded_port_id(bonded_port_id) != 0)
+ return -1;
+
+ internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+ return internals->link_up_delay_ms;
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_devargs.h>
+#include <rte_kvargs.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include "rte_eth_bond.h"
+#include "rte_eth_bond_private.h"
+
+const char *pmd_bond_init_valid_arguments[] = {
+ PMD_BOND_SLAVE_PORT_KVARG,
+ PMD_BOND_PRIMARY_SLAVE_KVARG,
+ PMD_BOND_MODE_KVARG,
+ PMD_BOND_XMIT_POLICY_KVARG,
+ PMD_BOND_SOCKET_ID_KVARG,
+ PMD_BOND_MAC_ADDR_KVARG,
+
+ NULL
+};
+
+static inline int
+find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr)
+{
+ struct rte_pci_addr *eth_pci_addr;
+ unsigned i;
+
+ for (i = 0; i < rte_eth_dev_count(); i++) {
+
+ if (rte_eth_devices[i].pci_dev == NULL)
+ continue;
+
+ eth_pci_addr = &(rte_eth_devices[i].pci_dev->addr);
+
+ if (pci_addr->bus == eth_pci_addr->bus &&
+ pci_addr->devid == eth_pci_addr->devid &&
+ pci_addr->domain == eth_pci_addr->domain &&
+ pci_addr->function == eth_pci_addr->function)
+ return i;
+ }
+ return -1;
+}
+
+static inline int
+find_port_id_by_dev_name(const char *name)
+{
+ unsigned i;
+
+ for (i = 0; i < rte_eth_dev_count(); i++) {
+ if (rte_eth_devices[i].data == NULL)
+ continue;
+
+ if (strcmp(rte_eth_devices[i].data->name, name) == 0)
+ return i;
+ }
+ return -1;
+}
+
+/**
+ * Parses a port identifier string to a port id by pci address, then by name,
+ * and finally port id.
+ */
+static inline int
+parse_port_id(const char *port_str)
+{
+ struct rte_pci_addr dev_addr;
+ int port_id;
+
+ /* try parsing as pci address, physical devices */
+ if (eal_parse_pci_DomBDF(port_str, &dev_addr) == 0) {
+ port_id = find_port_id_by_pci_addr(&dev_addr);
+ if (port_id < 0)
+ return -1;
+ } else {
+ /* try parsing as device name, virtual devices */
+ port_id = find_port_id_by_dev_name(port_str);
+ if (port_id < 0) {
+ char *end;
+ errno = 0;
+
+ /* try parsing as port id */
+ port_id = strtol(port_str, &end, 10);
+ if (*end != 0 || errno != 0)
+ return -1;
+ }
+ }
+
+ if (port_id < 0 || port_id > RTE_MAX_ETHPORTS) {
+ RTE_BOND_LOG(ERR, "Slave port specified (%s) outside expected range",
+ port_str);
+ return -1;
+ }
+ return port_id;
+}
+
+int
+bond_ethdev_parse_slave_port_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ struct bond_ethdev_slave_ports *slave_ports;
+
+ if (value == NULL || extra_args == NULL)
+ return -1;
+
+ slave_ports = extra_args;
+
+ if (strcmp(key, PMD_BOND_SLAVE_PORT_KVARG) == 0) {
+ int port_id = parse_port_id(value);
+ if (port_id < 0) {
+ RTE_BOND_LOG(ERR, "Invalid slave port value (%s) specified", value);
+ return -1;
+ } else
+ slave_ports->slaves[slave_ports->slave_count++] =
+ (uint8_t)port_id;
+ }
+ return 0;
+}
+
+int
+bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ uint8_t *mode;
+ char *endptr;
+
+ if (value == NULL || extra_args == NULL)
+ return -1;
+
+ mode = extra_args;
+
+ errno = 0;
+ *mode = strtol(value, &endptr, 10);
+ if (*endptr != 0 || errno != 0)
+ return -1;
+
+ /* validate mode value */
+ switch (*mode) {
+ case BONDING_MODE_ROUND_ROBIN:
+ case BONDING_MODE_ACTIVE_BACKUP:
+ case BONDING_MODE_BALANCE:
+ case BONDING_MODE_BROADCAST:
+ case BONDING_MODE_8023AD:
+ case BONDING_MODE_TLB:
+ case BONDING_MODE_ALB:
+ return 0;
+ default:
+ RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
+ return -1;
+ }
+}
+
+int
+bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ int socket_id;
+ char *endptr;
+
+ if (value == NULL || extra_args == NULL)
+ return -1;
+
+ errno = 0;
+ socket_id = (uint8_t)strtol(value, &endptr, 10);
+ if (*endptr != 0 || errno != 0)
+ return -1;
+
+ /* validate mode value */
+ if (socket_id >= 0 && socket_id < number_of_sockets()) {
+ *(uint8_t *)extra_args = (uint8_t)socket_id;
+ return 0;
+ }
+ return -1;
+}
+
+int
+bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ int primary_slave_port_id;
+
+ if (value == NULL || extra_args == NULL)
+ return -1;
+
+ primary_slave_port_id = parse_port_id(value);
+ if (primary_slave_port_id < 0)
+ return -1;
+
+ *(uint8_t *)extra_args = (uint8_t)primary_slave_port_id;
+
+ return 0;
+}
+
+int
+bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ uint8_t *xmit_policy;
+
+ if (value == NULL || extra_args == NULL)
+ return -1;
+
+ xmit_policy = extra_args;
+
+ if (strcmp(PMD_BOND_XMIT_POLICY_LAYER2_KVARG, value) == 0)
+ *xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
+ else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER23_KVARG, value) == 0)
+ *xmit_policy = BALANCE_XMIT_POLICY_LAYER23;
+ else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER34_KVARG, value) == 0)
+ *xmit_policy = BALANCE_XMIT_POLICY_LAYER34;
+ else
+ return -1;
+
+ return 0;
+}
+
+int
+bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ if (value == NULL || extra_args == NULL)
+ return -1;
+
+ /* Parse MAC */
+ return cmdline_parse_etheraddr(NULL, value, extra_args,
+ sizeof(struct ether_addr));
+}
+
+int
+bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ uint32_t time_ms;
+ char *endptr;
+
+ if (value == NULL || extra_args == NULL)
+ return -1;
+
+ errno = 0;
+ time_ms = (uint32_t)strtol(value, &endptr, 10);
+ if (*endptr != 0 || errno != 0)
+ return -1;
+
+ *(uint32_t *)extra_args = time_ms;
+
+ return 0;
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdlib.h>
+#include <netinet/in.h>
+
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_ip.h>
+#include <rte_devargs.h>
+#include <rte_kvargs.h>
+#include <rte_dev.h>
+#include <rte_alarm.h>
+#include <rte_cycles.h>
+
+#include "rte_eth_bond.h"
+#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad_private.h"
+
+#define REORDER_PERIOD_MS 10
+
+#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
+
+/* Table for statistics in mode 5 TLB */
+static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
+
+static inline size_t
+get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
+{
+ size_t vlan_offset = 0;
+
+ if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
+ struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+ vlan_offset = sizeof(struct vlan_hdr);
+ *proto = vlan_hdr->eth_proto;
+
+ if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
+ vlan_hdr = vlan_hdr + 1;
+ *proto = vlan_hdr->eth_proto;
+ vlan_offset += sizeof(struct vlan_hdr);
+ }
+ }
+ return vlan_offset;
+}
+
+static uint16_t
+bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+
+ uint16_t num_rx_slave = 0;
+ uint16_t num_rx_total = 0;
+
+ int i;
+
+ /* Cast to structure, containing bonded device's port id and queue id */
+ struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+
+ internals = bd_rx_q->dev_private;
+
+
+ for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
+ /* Offset of pointer to *bufs increases as packets are received
+ * from other slaves */
+ num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
+ bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
+ if (num_rx_slave) {
+ num_rx_total += num_rx_slave;
+ nb_pkts -= num_rx_slave;
+ }
+ }
+
+ return num_rx_total;
+}
+
+static uint16_t
+bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+
+ /* Cast to structure, containing bonded device's port id and queue id */
+ struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+
+ internals = bd_rx_q->dev_private;
+
+ return rte_eth_rx_burst(internals->current_primary_port,
+ bd_rx_q->queue_id, bufs, nb_pkts);
+}
+
+static uint16_t
+bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ /* Cast to structure, containing bonded device's port id and queue id */
+ struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+ struct bond_dev_private *internals = bd_rx_q->dev_private;
+ struct ether_addr bond_mac;
+
+ struct ether_hdr *hdr;
+
+ const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
+ uint16_t num_rx_total = 0; /* Total number of received packets */
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+ uint8_t slave_count;
+
+ uint8_t collecting; /* current slave collecting status */
+ const uint8_t promisc = internals->promiscuous_en;
+ uint8_t i, j, k;
+
+ rte_eth_macaddr_get(internals->port_id, &bond_mac);
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ slave_count = internals->active_slave_count;
+ memcpy(slaves, internals->active_slaves,
+ sizeof(internals->active_slaves[0]) * slave_count);
+
+ for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
+ j = num_rx_total;
+ collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
+
+ /* Read packets from this slave */
+ num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
+ &bufs[num_rx_total], nb_pkts - num_rx_total);
+
+ for (k = j; k < 2 && k < num_rx_total; k++)
+ rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
+
+ /* Handle slow protocol packets. */
+ while (j < num_rx_total) {
+ if (j + 3 < num_rx_total)
+ rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
+
+ hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+ /* Remove packet from array if it is slow packet or slave is not
+ * in collecting state or bondign interface is not in promiscus
+ * mode and packet address does not match. */
+ if (unlikely(hdr->ether_type == ether_type_slow_be ||
+ !collecting || (!promisc &&
+ !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
+
+ if (hdr->ether_type == ether_type_slow_be) {
+ bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
+ bufs[j]);
+ } else
+ rte_pktmbuf_free(bufs[j]);
+
+ /* Packet is managed by mode 4 or dropped, shift the array */
+ num_rx_total--;
+ if (j < num_rx_total) {
+ memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
+ (num_rx_total - j));
+ }
+ } else
+ j++;
+ }
+ }
+
+ return num_rx_total;
+}
+
+#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
+uint32_t burstnumberRX;
+uint32_t burstnumberTX;
+
+#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
+
+static void
+arp_op_name(uint16_t arp_op, char *buf)
+{
+ switch (arp_op) {
+ case ARP_OP_REQUEST:
+ snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
+ return;
+ case ARP_OP_REPLY:
+ snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
+ return;
+ case ARP_OP_REVREQUEST:
+ snprintf(buf, sizeof("Reverse ARP Request"), "%s",
+ "Reverse ARP Request");
+ return;
+ case ARP_OP_REVREPLY:
+ snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
+ "Reverse ARP Reply");
+ return;
+ case ARP_OP_INVREQUEST:
+ snprintf(buf, sizeof("Peer Identify Request"), "%s",
+ "Peer Identify Request");
+ return;
+ case ARP_OP_INVREPLY:
+ snprintf(buf, sizeof("Peer Identify Reply"), "%s",
+ "Peer Identify Reply");
+ return;
+ default:
+ break;
+ }
+ snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
+ return;
+}
+#endif
+#define MaxIPv4String 16
+static void
+ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
+{
+ uint32_t ipv4_addr;
+
+ ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
+ snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
+ (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
+ ipv4_addr & 0xFF);
+}
+
+#define MAX_CLIENTS_NUMBER 128
+uint8_t active_clients;
+struct client_stats_t {
+ uint8_t port;
+ uint32_t ipv4_addr;
+ uint32_t ipv4_rx_packets;
+ uint32_t ipv4_tx_packets;
+};
+struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
+
+static void
+update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
+{
+ int i = 0;
+
+ for (; i < MAX_CLIENTS_NUMBER; i++) {
+ if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
+ /* Just update RX packets number for this client */
+ if (TXorRXindicator == &burstnumberRX)
+ client_stats[i].ipv4_rx_packets++;
+ else
+ client_stats[i].ipv4_tx_packets++;
+ return;
+ }
+ }
+ /* We have a new client. Insert him to the table, and increment stats */
+ if (TXorRXindicator == &burstnumberRX)
+ client_stats[active_clients].ipv4_rx_packets++;
+ else
+ client_stats[active_clients].ipv4_tx_packets++;
+ client_stats[active_clients].ipv4_addr = addr;
+ client_stats[active_clients].port = port;
+ active_clients++;
+
+}
+
+#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
+#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
+ RTE_LOG(DEBUG, PMD, \
+ "%s " \
+ "port:%d " \
+ "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
+ "SrcIP:%s " \
+ "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
+ "DstIP:%s " \
+ "%s " \
+ "%d\n", \
+ info, \
+ port, \
+ eth_h->s_addr.addr_bytes[0], \
+ eth_h->s_addr.addr_bytes[1], \
+ eth_h->s_addr.addr_bytes[2], \
+ eth_h->s_addr.addr_bytes[3], \
+ eth_h->s_addr.addr_bytes[4], \
+ eth_h->s_addr.addr_bytes[5], \
+ src_ip, \
+ eth_h->d_addr.addr_bytes[0], \
+ eth_h->d_addr.addr_bytes[1], \
+ eth_h->d_addr.addr_bytes[2], \
+ eth_h->d_addr.addr_bytes[3], \
+ eth_h->d_addr.addr_bytes[4], \
+ eth_h->d_addr.addr_bytes[5], \
+ dst_ip, \
+ arp_op, \
+ ++burstnumber)
+#endif
+
+static void
+mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
+ uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
+{
+ struct ipv4_hdr *ipv4_h;
+#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
+ struct arp_hdr *arp_h;
+ char dst_ip[16];
+ char ArpOp[24];
+ char buf[16];
+#endif
+ char src_ip[16];
+
+ uint16_t ether_type = eth_h->ether_type;
+ uint16_t offset = get_vlan_offset(eth_h, ðer_type);
+
+#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
+ snprintf(buf, 16, "%s", info);
+#endif
+
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+ ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
+ ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
+#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
+ ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
+ MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
+#endif
+ update_client_stats(ipv4_h->src_addr, port, burstnumber);
+ }
+#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
+ else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
+ arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
+ ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
+ ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
+ arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
+ MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
+ }
+#endif
+}
+#endif
+
+static uint16_t
+bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+ struct bond_dev_private *internals = bd_tx_q->dev_private;
+ struct ether_hdr *eth_h;
+ uint16_t ether_type, offset;
+ uint16_t nb_recv_pkts;
+ int i;
+
+ nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
+
+ for (i = 0; i < nb_recv_pkts; i++) {
+ eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
+ ether_type = eth_h->ether_type;
+ offset = get_vlan_offset(eth_h, ðer_type);
+
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
+#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
+ mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
+#endif
+ bond_mode_alb_arp_recv(eth_h, offset, internals);
+ }
+#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
+ else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
+ mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
+#endif
+ }
+
+ return nb_recv_pkts;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+ struct bond_tx_queue *bd_tx_q;
+
+ struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+ uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+ uint8_t num_of_slaves;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+
+ uint16_t num_tx_total = 0, num_tx_slave;
+
+ static int slave_idx = 0;
+ int i, cslave_idx = 0, tx_fail_total = 0;
+
+ bd_tx_q = (struct bond_tx_queue *)queue;
+ internals = bd_tx_q->dev_private;
+
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ num_of_slaves = internals->active_slave_count;
+ memcpy(slaves, internals->active_slaves,
+ sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+ if (num_of_slaves < 1)
+ return num_tx_total;
+
+ /* Populate slaves mbuf with which packets are to be sent on it */
+ for (i = 0; i < nb_pkts; i++) {
+ cslave_idx = (slave_idx + i) % num_of_slaves;
+ slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
+ }
+
+ /* increment current slave index so the next call to tx burst starts on the
+ * next slave */
+ slave_idx = ++cslave_idx;
+
+ /* Send packet burst on each slave device */
+ for (i = 0; i < num_of_slaves; i++) {
+ if (slave_nb_pkts[i] > 0) {
+ num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ slave_bufs[i], slave_nb_pkts[i]);
+
+ /* if tx burst fails move packets to end of bufs */
+ if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+ int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
+
+ tx_fail_total += tx_fail_slave;
+
+ memcpy(&bufs[nb_pkts - tx_fail_total],
+ &slave_bufs[i][num_tx_slave],
+ tx_fail_slave * sizeof(bufs[0]));
+ }
+ num_tx_total += num_tx_slave;
+ }
+ }
+
+ return num_tx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_active_backup(void *queue,
+ struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+ struct bond_tx_queue *bd_tx_q;
+
+ bd_tx_q = (struct bond_tx_queue *)queue;
+ internals = bd_tx_q->dev_private;
+
+ if (internals->active_slave_count < 1)
+ return 0;
+
+ return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
+ bufs, nb_pkts);
+}
+
+static inline uint16_t
+ether_hash(struct ether_hdr *eth_hdr)
+{
+ uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
+ uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
+
+ return (word_src_addr[0] ^ word_dst_addr[0]) ^
+ (word_src_addr[1] ^ word_dst_addr[1]) ^
+ (word_src_addr[2] ^ word_dst_addr[2]);
+}
+
+static inline uint32_t
+ipv4_hash(struct ipv4_hdr *ipv4_hdr)
+{
+ return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
+}
+
+static inline uint32_t
+ipv6_hash(struct ipv6_hdr *ipv6_hdr)
+{
+ uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
+ uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
+
+ return (word_src_addr[0] ^ word_dst_addr[0]) ^
+ (word_src_addr[1] ^ word_dst_addr[1]) ^
+ (word_src_addr[2] ^ word_dst_addr[2]) ^
+ (word_src_addr[3] ^ word_dst_addr[3]);
+}
+
+uint16_t
+xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+{
+ struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+
+ uint32_t hash = ether_hash(eth_hdr);
+
+ return (hash ^= hash >> 8) % slave_count;
+}
+
+uint16_t
+xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+{
+ struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+ uint16_t proto = eth_hdr->ether_type;
+ size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
+ uint32_t hash, l3hash = 0;
+
+ hash = ether_hash(eth_hdr);
+
+ if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
+ struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+ ((char *)(eth_hdr + 1) + vlan_offset);
+ l3hash = ipv4_hash(ipv4_hdr);
+
+ } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
+ struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+ ((char *)(eth_hdr + 1) + vlan_offset);
+ l3hash = ipv6_hash(ipv6_hdr);
+ }
+
+ hash = hash ^ l3hash;
+ hash ^= hash >> 16;
+ hash ^= hash >> 8;
+
+ return hash % slave_count;
+}
+
+uint16_t
+xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+{
+ struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+ uint16_t proto = eth_hdr->ether_type;
+ size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
+
+ struct udp_hdr *udp_hdr = NULL;
+ struct tcp_hdr *tcp_hdr = NULL;
+ uint32_t hash, l3hash = 0, l4hash = 0;
+
+ if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
+ struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+ ((char *)(eth_hdr + 1) + vlan_offset);
+ size_t ip_hdr_offset;
+
+ l3hash = ipv4_hash(ipv4_hdr);
+
+ ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
+ IPV4_IHL_MULTIPLIER;
+
+ if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
+ tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
+ ip_hdr_offset);
+ l4hash = HASH_L4_PORTS(tcp_hdr);
+ } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
+ udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
+ ip_hdr_offset);
+ l4hash = HASH_L4_PORTS(udp_hdr);
+ }
+ } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
+ struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+ ((char *)(eth_hdr + 1) + vlan_offset);
+ l3hash = ipv6_hash(ipv6_hdr);
+
+ if (ipv6_hdr->proto == IPPROTO_TCP) {
+ tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
+ l4hash = HASH_L4_PORTS(tcp_hdr);
+ } else if (ipv6_hdr->proto == IPPROTO_UDP) {
+ udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
+ l4hash = HASH_L4_PORTS(udp_hdr);
+ }
+ }
+
+ hash = l3hash ^ l4hash;
+ hash ^= hash >> 16;
+ hash ^= hash >> 8;
+
+ return hash % slave_count;
+}
+
+struct bwg_slave {
+ uint64_t bwg_left_int;
+ uint64_t bwg_left_remainder;
+ uint8_t slave;
+};
+
+void
+bond_tlb_activate_slave(struct bond_dev_private *internals) {
+ int i;
+
+ for (i = 0; i < internals->active_slave_count; i++) {
+ tlb_last_obytets[internals->active_slaves[i]] = 0;
+ }
+}
+
+static int
+bandwidth_cmp(const void *a, const void *b)
+{
+ const struct bwg_slave *bwg_a = a;
+ const struct bwg_slave *bwg_b = b;
+ int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
+ int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
+ (int64_t)bwg_a->bwg_left_remainder;
+ if (diff > 0)
+ return 1;
+ else if (diff < 0)
+ return -1;
+ else if (diff2 > 0)
+ return 1;
+ else if (diff2 < 0)
+ return -1;
+ else
+ return 0;
+}
+
+static void
+bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
+ struct bwg_slave *bwg_slave)
+{
+ struct rte_eth_link link_status;
+
+ rte_eth_link_get(port_id, &link_status);
+ uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
+ if (link_bwg == 0)
+ return;
+ link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
+ bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
+ bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
+}
+
+static void
+bond_ethdev_update_tlb_slave_cb(void *arg)
+{
+ struct bond_dev_private *internals = arg;
+ struct rte_eth_stats slave_stats;
+ struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
+ uint8_t slave_count;
+ uint64_t tx_bytes;
+
+ uint8_t update_stats = 0;
+ uint8_t i, slave_id;
+
+ internals->slave_update_idx++;
+
+
+ if (internals->slave_update_idx >= REORDER_PERIOD_MS)
+ update_stats = 1;
+
+ for (i = 0; i < internals->active_slave_count; i++) {
+ slave_id = internals->active_slaves[i];
+ rte_eth_stats_get(slave_id, &slave_stats);
+ tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
+ bandwidth_left(slave_id, tx_bytes,
+ internals->slave_update_idx, &bwg_array[i]);
+ bwg_array[i].slave = slave_id;
+
+ if (update_stats) {
+ tlb_last_obytets[slave_id] = slave_stats.obytes;
+ }
+ }
+
+ if (update_stats == 1)
+ internals->slave_update_idx = 0;
+
+ slave_count = i;
+ qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
+ for (i = 0; i < slave_count; i++)
+ internals->tlb_slaves_order[i] = bwg_array[i].slave;
+
+ rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
+ (struct bond_dev_private *)internals);
+}
+
+static uint16_t
+bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+ struct bond_dev_private *internals = bd_tx_q->dev_private;
+
+ struct rte_eth_dev *primary_port =
+ &rte_eth_devices[internals->primary_port];
+ uint16_t num_tx_total = 0;
+ uint8_t i, j;
+
+ uint8_t num_of_slaves = internals->active_slave_count;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+
+ struct ether_hdr *ether_hdr;
+ struct ether_addr primary_slave_addr;
+ struct ether_addr active_slave_addr;
+
+ if (num_of_slaves < 1)
+ return num_tx_total;
+
+ memcpy(slaves, internals->tlb_slaves_order,
+ sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
+
+
+ ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
+
+ if (nb_pkts > 3) {
+ for (i = 0; i < 3; i++)
+ rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
+ }
+
+ for (i = 0; i < num_of_slaves; i++) {
+ rte_eth_macaddr_get(slaves[i], &active_slave_addr);
+ for (j = num_tx_total; j < nb_pkts; j++) {
+ if (j + 3 < nb_pkts)
+ rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
+
+ ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+ if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
+ ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
+#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
+ mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
+#endif
+ }
+
+ num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ bufs + num_tx_total, nb_pkts - num_tx_total);
+
+ if (num_tx_total == nb_pkts)
+ break;
+ }
+
+ return num_tx_total;
+}
+
+void
+bond_tlb_disable(struct bond_dev_private *internals)
+{
+ rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
+}
+
+void
+bond_tlb_enable(struct bond_dev_private *internals)
+{
+ bond_ethdev_update_tlb_slave_cb(internals);
+}
+
+static uint16_t
+bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+ struct bond_dev_private *internals = bd_tx_q->dev_private;
+
+ struct ether_hdr *eth_h;
+ uint16_t ether_type, offset;
+
+ struct client_data *client_info;
+
+ /*
+ * We create transmit buffers for every slave and one additional to send
+ * through tlb. In worst case every packet will be send on one port.
+ */
+ struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
+ uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
+
+ /*
+ * We create separate transmit buffers for update packets as they wont be
+ * counted in num_tx_total.
+ */
+ struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
+ uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+ struct rte_mbuf *upd_pkt;
+ size_t pkt_size;
+
+ uint16_t num_send, num_not_send = 0;
+ uint16_t num_tx_total = 0;
+ uint8_t slave_idx;
+
+ int i, j;
+
+ /* Search tx buffer for ARP packets and forward them to alb */
+ for (i = 0; i < nb_pkts; i++) {
+ eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
+ ether_type = eth_h->ether_type;
+ offset = get_vlan_offset(eth_h, ðer_type);
+
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
+ slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
+
+ /* Change src mac in eth header */
+ rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
+
+ /* Add packet to slave tx buffer */
+ slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
+ slave_bufs_pkts[slave_idx]++;
+ } else {
+ /* If packet is not ARP, send it with TLB policy */
+ slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
+ bufs[i];
+ slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
+ }
+ }
+
+ /* Update connected client ARP tables */
+ if (internals->mode6.ntt) {
+ for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
+ client_info = &internals->mode6.client_table[i];
+
+ if (client_info->in_use) {
+ /* Allocate new packet to send ARP update on current slave */
+ upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
+ if (upd_pkt == NULL) {
+ RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
+ continue;
+ }
+ pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
+ + client_info->vlan_count * sizeof(struct vlan_hdr);
+ upd_pkt->data_len = pkt_size;
+ upd_pkt->pkt_len = pkt_size;
+
+ slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
+ internals);
+
+ /* Add packet to update tx buffer */
+ update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
+ update_bufs_pkts[slave_idx]++;
+ }
+ }
+ internals->mode6.ntt = 0;
+ }
+
+ /* Send ARP packets on proper slaves */
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (slave_bufs_pkts[i] > 0) {
+ num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
+ slave_bufs[i], slave_bufs_pkts[i]);
+ for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
+ bufs[nb_pkts - 1 - num_not_send - j] =
+ slave_bufs[i][nb_pkts - 1 - j];
+ }
+
+ num_tx_total += num_send;
+ num_not_send += slave_bufs_pkts[i] - num_send;
+
+#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
+ /* Print TX stats including update packets */
+ for (j = 0; j < slave_bufs_pkts[i]; j++) {
+ eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
+ mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
+ }
+#endif
+ }
+ }
+
+ /* Send update packets on proper slaves */
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (update_bufs_pkts[i] > 0) {
+ num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
+ update_bufs_pkts[i]);
+ for (j = num_send; j < update_bufs_pkts[i]; j++) {
+ rte_pktmbuf_free(update_bufs[i][j]);
+ }
+#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
+ for (j = 0; j < update_bufs_pkts[i]; j++) {
+ eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
+ mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
+ }
+#endif
+ }
+ }
+
+ /* Send non-ARP packets using tlb policy */
+ if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
+ num_send = bond_ethdev_tx_burst_tlb(queue,
+ slave_bufs[RTE_MAX_ETHPORTS],
+ slave_bufs_pkts[RTE_MAX_ETHPORTS]);
+
+ for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
+ bufs[nb_pkts - 1 - num_not_send - j] =
+ slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
+ }
+
+ num_tx_total += num_send;
+ num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
+ }
+
+ return num_tx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+ struct bond_tx_queue *bd_tx_q;
+
+ uint8_t num_of_slaves;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+
+ uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
+
+ int i, op_slave_id;
+
+ struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+ uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+ bd_tx_q = (struct bond_tx_queue *)queue;
+ internals = bd_tx_q->dev_private;
+
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ num_of_slaves = internals->active_slave_count;
+ memcpy(slaves, internals->active_slaves,
+ sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+ if (num_of_slaves < 1)
+ return num_tx_total;
+
+ /* Populate slaves mbuf with the packets which are to be sent on it */
+ for (i = 0; i < nb_pkts; i++) {
+ /* Select output slave using hash based on xmit policy */
+ op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
+
+ /* Populate slave mbuf arrays with mbufs for that slave */
+ slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
+ }
+
+ /* Send packet burst on each slave device */
+ for (i = 0; i < num_of_slaves; i++) {
+ if (slave_nb_pkts[i] > 0) {
+ num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ slave_bufs[i], slave_nb_pkts[i]);
+
+ /* if tx burst fails move packets to end of bufs */
+ if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+ int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
+
+ tx_fail_total += slave_tx_fail_count;
+ memcpy(&bufs[nb_pkts - tx_fail_total],
+ &slave_bufs[i][num_tx_slave],
+ slave_tx_fail_count * sizeof(bufs[0]));
+ }
+
+ num_tx_total += num_tx_slave;
+ }
+ }
+
+ return num_tx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+ struct bond_tx_queue *bd_tx_q;
+
+ uint8_t num_of_slaves;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+ /* positions in slaves, not ID */
+ uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
+ uint8_t distributing_count;
+
+ uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
+ uint16_t i, j, op_slave_idx;
+ const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
+
+ /* Allocate additional packets in case 8023AD mode. */
+ struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
+ void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
+
+ /* Total amount of packets in slave_bufs */
+ uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+ /* Slow packets placed in each slave */
+ uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+ bd_tx_q = (struct bond_tx_queue *)queue;
+ internals = bd_tx_q->dev_private;
+
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ num_of_slaves = internals->active_slave_count;
+ if (num_of_slaves < 1)
+ return num_tx_total;
+
+ memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+
+ distributing_count = 0;
+ for (i = 0; i < num_of_slaves; i++) {
+ struct port *port = &mode_8023ad_ports[slaves[i]];
+
+ slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
+ slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
+ slave_nb_pkts[i] = slave_slow_nb_pkts[i];
+
+ for (j = 0; j < slave_slow_nb_pkts[i]; j++)
+ slave_bufs[i][j] = slow_pkts[j];
+
+ if (ACTOR_STATE(port, DISTRIBUTING))
+ distributing_offsets[distributing_count++] = i;
+ }
+
+ if (likely(distributing_count > 0)) {
+ /* Populate slaves mbuf with the packets which are to be sent on it */
+ for (i = 0; i < nb_pkts; i++) {
+ /* Select output slave using hash based on xmit policy */
+ op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
+
+ /* Populate slave mbuf arrays with mbufs for that slave. Use only
+ * slaves that are currently distributing. */
+ uint8_t slave_offset = distributing_offsets[op_slave_idx];
+ slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
+ slave_nb_pkts[slave_offset]++;
+ }
+ }
+
+ /* Send packet burst on each slave device */
+ for (i = 0; i < num_of_slaves; i++) {
+ if (slave_nb_pkts[i] == 0)
+ continue;
+
+ num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ slave_bufs[i], slave_nb_pkts[i]);
+
+ /* If tx burst fails drop slow packets */
+ for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
+ rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
+
+ num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
+ num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+
+ /* If tx burst fails move packets to end of bufs */
+ if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+ uint16_t j = nb_pkts - num_tx_fail_total;
+ for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
+ bufs[j] = slave_bufs[i][num_tx_slave];
+ }
+ }
+
+ return num_tx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+ struct bond_tx_queue *bd_tx_q;
+
+ uint8_t tx_failed_flag = 0, num_of_slaves;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+
+ uint16_t max_nb_of_tx_pkts = 0;
+
+ int slave_tx_total[RTE_MAX_ETHPORTS];
+ int i, most_successful_tx_slave = -1;
+
+ bd_tx_q = (struct bond_tx_queue *)queue;
+ internals = bd_tx_q->dev_private;
+
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ num_of_slaves = internals->active_slave_count;
+ memcpy(slaves, internals->active_slaves,
+ sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+ if (num_of_slaves < 1)
+ return 0;
+
+ /* Increment reference count on mbufs */
+ for (i = 0; i < nb_pkts; i++)
+ rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
+
+ /* Transmit burst on each active slave */
+ for (i = 0; i < num_of_slaves; i++) {
+ slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ bufs, nb_pkts);
+
+ if (unlikely(slave_tx_total[i] < nb_pkts))
+ tx_failed_flag = 1;
+
+ /* record the value and slave index for the slave which transmits the
+ * maximum number of packets */
+ if (slave_tx_total[i] > max_nb_of_tx_pkts) {
+ max_nb_of_tx_pkts = slave_tx_total[i];
+ most_successful_tx_slave = i;
+ }
+ }
+
+ /* if slaves fail to transmit packets from burst, the calling application
+ * is not expected to know about multiple references to packets so we must
+ * handle failures of all packets except those of the most successful slave
+ */
+ if (unlikely(tx_failed_flag))
+ for (i = 0; i < num_of_slaves; i++)
+ if (i != most_successful_tx_slave)
+ while (slave_tx_total[i] < nb_pkts)
+ rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
+
+ return max_nb_of_tx_pkts;
+}
+
+void
+link_properties_set(struct rte_eth_dev *bonded_eth_dev,
+ struct rte_eth_link *slave_dev_link)
+{
+ struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
+ struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+ if (slave_dev_link->link_status &&
+ bonded_eth_dev->data->dev_started) {
+ bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
+ bonded_dev_link->link_speed = slave_dev_link->link_speed;
+
+ internals->link_props_set = 1;
+ }
+}
+
+void
+link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
+{
+ struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+ memset(&(bonded_eth_dev->data->dev_link), 0,
+ sizeof(bonded_eth_dev->data->dev_link));
+
+ internals->link_props_set = 0;
+}
+
+int
+link_properties_valid(struct rte_eth_link *bonded_dev_link,
+ struct rte_eth_link *slave_dev_link)
+{
+ if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
+ bonded_dev_link->link_speed != slave_dev_link->link_speed)
+ return -1;
+
+ return 0;
+}
+
+int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
+{
+ struct ether_addr *mac_addr;
+
+ if (eth_dev == NULL) {
+ RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
+ return -1;
+ }
+
+ if (dst_mac_addr == NULL) {
+ RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
+ return -1;
+ }
+
+ mac_addr = eth_dev->data->mac_addrs;
+
+ ether_addr_copy(mac_addr, dst_mac_addr);
+ return 0;
+}
+
+int
+mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
+{
+ struct ether_addr *mac_addr;
+
+ if (eth_dev == NULL) {
+ RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
+ return -1;
+ }
+
+ if (new_mac_addr == NULL) {
+ RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
+ return -1;
+ }
+
+ mac_addr = eth_dev->data->mac_addrs;
+
+ /* If new MAC is different to current MAC then update */
+ if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
+ memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
+
+ return 0;
+}
+
+int
+mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
+{
+ struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+ int i;
+
+ /* Update slave devices MAC addresses */
+ if (internals->slave_count < 1)
+ return -1;
+
+ switch (internals->mode) {
+ case BONDING_MODE_ROUND_ROBIN:
+ case BONDING_MODE_BALANCE:
+ case BONDING_MODE_BROADCAST:
+ for (i = 0; i < internals->slave_count; i++) {
+ if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
+ bonded_eth_dev->data->mac_addrs)) {
+ RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
+ internals->slaves[i].port_id);
+ return -1;
+ }
+ }
+ break;
+ case BONDING_MODE_8023AD:
+ bond_mode_8023ad_mac_address_update(bonded_eth_dev);
+ break;
+ case BONDING_MODE_ACTIVE_BACKUP:
+ case BONDING_MODE_TLB:
+ case BONDING_MODE_ALB:
+ default:
+ for (i = 0; i < internals->slave_count; i++) {
+ if (internals->slaves[i].port_id ==
+ internals->current_primary_port) {
+ if (mac_address_set(&rte_eth_devices[internals->primary_port],
+ bonded_eth_dev->data->mac_addrs)) {
+ RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
+ internals->current_primary_port);
+ return -1;
+ }
+ } else {
+ if (mac_address_set(
+ &rte_eth_devices[internals->slaves[i].port_id],
+ &internals->slaves[i].persisted_mac_addr)) {
+ RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
+ internals->slaves[i].port_id);
+ return -1;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+int
+bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
+{
+ struct bond_dev_private *internals;
+
+ internals = eth_dev->data->dev_private;
+
+ switch (mode) {
+ case BONDING_MODE_ROUND_ROBIN:
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
+ break;
+ case BONDING_MODE_ACTIVE_BACKUP:
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
+ break;
+ case BONDING_MODE_BALANCE:
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
+ break;
+ case BONDING_MODE_BROADCAST:
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
+ break;
+ case BONDING_MODE_8023AD:
+ if (bond_mode_8023ad_enable(eth_dev) != 0)
+ return -1;
+
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
+ RTE_LOG(WARNING, PMD,
+ "Using mode 4, it is necessary to do TX burst and RX burst "
+ "at least every 100ms.\n");
+ break;
+ case BONDING_MODE_TLB:
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
+ break;
+ case BONDING_MODE_ALB:
+ if (bond_mode_alb_enable(eth_dev) != 0)
+ return -1;
+
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
+ break;
+ default:
+ return -1;
+ }
+
+ internals->mode = mode;
+
+ return 0;
+}
+
+int
+slave_configure(struct rte_eth_dev *bonded_eth_dev,
+ struct rte_eth_dev *slave_eth_dev)
+{
+ struct bond_rx_queue *bd_rx_q;
+ struct bond_tx_queue *bd_tx_q;
+
+ int errval;
+ uint16_t q_id;
+
+ /* Stop slave */
+ rte_eth_dev_stop(slave_eth_dev->data->port_id);
+
+ /* Enable interrupts on slave device if supported */
+ if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
+ slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
+
+ /* Configure device */
+ errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
+ bonded_eth_dev->data->nb_rx_queues,
+ bonded_eth_dev->data->nb_tx_queues,
+ &(slave_eth_dev->data->dev_conf));
+ if (errval != 0) {
+ RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
+ slave_eth_dev->data->port_id, errval);
+ return errval;
+ }
+
+ /* Setup Rx Queues */
+ for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
+ bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
+
+ errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
+ bd_rx_q->nb_rx_desc,
+ rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
+ &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
+ if (errval != 0) {
+ RTE_BOND_LOG(ERR,
+ "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
+ slave_eth_dev->data->port_id, q_id, errval);
+ return errval;
+ }
+ }
+
+ /* Setup Tx Queues */
+ for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
+ bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
+
+ errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
+ bd_tx_q->nb_tx_desc,
+ rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
+ &bd_tx_q->tx_conf);
+ if (errval != 0) {
+ RTE_BOND_LOG(ERR,
+ "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
+ slave_eth_dev->data->port_id, q_id, errval);
+ return errval;
+ }
+ }
+
+ /* Start device */
+ errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
+ if (errval != 0) {
+ RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
+ slave_eth_dev->data->port_id, errval);
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+slave_remove(struct bond_dev_private *internals,
+ struct rte_eth_dev *slave_eth_dev)
+{
+ uint8_t i;
+
+ for (i = 0; i < internals->slave_count; i++)
+ if (internals->slaves[i].port_id ==
+ slave_eth_dev->data->port_id)
+ break;
+
+ if (i < (internals->slave_count - 1))
+ memmove(&internals->slaves[i], &internals->slaves[i + 1],
+ sizeof(internals->slaves[0]) *
+ (internals->slave_count - i - 1));
+
+ internals->slave_count--;
+}
+
+static void
+bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
+
+void
+slave_add(struct bond_dev_private *internals,
+ struct rte_eth_dev *slave_eth_dev)
+{
+ struct bond_slave_details *slave_details =
+ &internals->slaves[internals->slave_count];
+
+ slave_details->port_id = slave_eth_dev->data->port_id;
+ slave_details->last_link_status = 0;
+
+ /* If slave device doesn't support interrupts then we need to enabled
+ * polling to monitor link status */
+ if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+ slave_details->link_status_poll_enabled = 1;
+
+ if (!internals->link_status_polling_enabled) {
+ internals->link_status_polling_enabled = 1;
+
+ rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
+ bond_ethdev_slave_link_status_change_monitor,
+ (void *)&rte_eth_devices[internals->port_id]);
+ }
+ }
+
+ slave_details->link_status_wait_to_complete = 0;
+ /* clean tlb_last_obytes when adding port for bonding device */
+ memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
+ sizeof(struct ether_addr));
+}
+
+void
+bond_ethdev_primary_set(struct bond_dev_private *internals,
+ uint8_t slave_port_id)
+{
+ int i;
+
+ if (internals->active_slave_count < 1)
+ internals->current_primary_port = slave_port_id;
+ else
+ /* Search bonded device slave ports for new proposed primary port */
+ for (i = 0; i < internals->active_slave_count; i++) {
+ if (internals->active_slaves[i] == slave_port_id)
+ internals->current_primary_port = slave_port_id;
+ }
+}
+
+static void
+bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
+
+static int
+bond_ethdev_start(struct rte_eth_dev *eth_dev)
+{
+ struct bond_dev_private *internals;
+ int i;
+
+ /* slave eth dev will be started by bonded device */
+ if (valid_bonded_ethdev(eth_dev)) {
+ RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
+ eth_dev->data->port_id);
+ return -1;
+ }
+
+ eth_dev->data->dev_link.link_status = 0;
+ eth_dev->data->dev_started = 1;
+
+ internals = eth_dev->data->dev_private;
+
+ if (internals->slave_count == 0) {
+ RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
+ return -1;
+ }
+
+ if (internals->user_defined_mac == 0) {
+ struct ether_addr *new_mac_addr = NULL;
+
+ for (i = 0; i < internals->slave_count; i++)
+ if (internals->slaves[i].port_id == internals->primary_port)
+ new_mac_addr = &internals->slaves[i].persisted_mac_addr;
+
+ if (new_mac_addr == NULL)
+ return -1;
+
+ if (mac_address_set(eth_dev, new_mac_addr) != 0) {
+ RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
+ eth_dev->data->port_id);
+ return -1;
+ }
+ }
+
+ /* Update all slave devices MACs*/
+ if (mac_address_slaves_update(eth_dev) != 0)
+ return -1;
+
+ /* If bonded device is configure in promiscuous mode then re-apply config */
+ if (internals->promiscuous_en)
+ bond_ethdev_promiscuous_enable(eth_dev);
+
+ /* Reconfigure each slave device if starting bonded device */
+ for (i = 0; i < internals->slave_count; i++) {
+ if (slave_configure(eth_dev,
+ &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
+ RTE_BOND_LOG(ERR,
+ "bonded port (%d) failed to reconfigure slave device (%d)",
+ eth_dev->data->port_id, internals->slaves[i].port_id);
+ return -1;
+ }
+ }
+
+ if (internals->user_defined_primary_port)
+ bond_ethdev_primary_set(internals, internals->primary_port);
+
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_start(eth_dev);
+
+ if (internals->mode == BONDING_MODE_TLB ||
+ internals->mode == BONDING_MODE_ALB)
+ bond_tlb_enable(internals);
+
+ return 0;
+}
+
+static void
+bond_ethdev_stop(struct rte_eth_dev *eth_dev)
+{
+ struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t i;
+
+ if (internals->mode == BONDING_MODE_8023AD) {
+ struct port *port;
+ void *pkt = NULL;
+
+ bond_mode_8023ad_stop(eth_dev);
+
+ /* Discard all messages to/from mode 4 state machines */
+ for (i = 0; i < internals->slave_count; i++) {
+ port = &mode_8023ad_ports[internals->slaves[i].port_id];
+
+ RTE_VERIFY(port->rx_ring != NULL);
+ while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
+ rte_pktmbuf_free(pkt);
+
+ RTE_VERIFY(port->tx_ring != NULL);
+ while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
+ rte_pktmbuf_free(pkt);
+ }
+ }
+
+ if (internals->mode == BONDING_MODE_TLB ||
+ internals->mode == BONDING_MODE_ALB) {
+ bond_tlb_disable(internals);
+ for (i = 0; i < internals->active_slave_count; i++)
+ tlb_last_obytets[internals->active_slaves[i]] = 0;
+ }
+
+ internals->active_slave_count = 0;
+ internals->link_status_polling_enabled = 0;
+
+ eth_dev->data->dev_link.link_status = 0;
+ eth_dev->data->dev_started = 0;
+}
+
+static void
+bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+/* forward declaration */
+static int bond_ethdev_configure(struct rte_eth_dev *dev);
+
+static void
+bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+ struct bond_dev_private *internals = dev->data->dev_private;
+
+ dev_info->driver_name = driver_name;
+ dev_info->max_mac_addrs = 1;
+
+ dev_info->max_rx_pktlen = (uint32_t)2048;
+
+ dev_info->max_rx_queues = (uint16_t)128;
+ dev_info->max_tx_queues = (uint16_t)512;
+
+ dev_info->min_rx_bufsize = 0;
+ dev_info->pci_dev = dev->pci_dev;
+
+ dev_info->rx_offload_capa = internals->rx_offload_capa;
+ dev_info->tx_offload_capa = internals->tx_offload_capa;
+}
+
+static int
+bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+ uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
+ const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
+{
+ struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
+ rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
+ 0, dev->pci_dev->numa_node);
+ if (bd_rx_q == NULL)
+ return -1;
+
+ bd_rx_q->queue_id = rx_queue_id;
+ bd_rx_q->dev_private = dev->data->dev_private;
+
+ bd_rx_q->nb_rx_desc = nb_rx_desc;
+
+ memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
+ bd_rx_q->mb_pool = mb_pool;
+
+ dev->data->rx_queues[rx_queue_id] = bd_rx_q;
+
+ return 0;
+}
+
+static int
+bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+ uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
+ rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
+ 0, dev->pci_dev->numa_node);
+
+ if (bd_tx_q == NULL)
+ return -1;
+
+ bd_tx_q->queue_id = tx_queue_id;
+ bd_tx_q->dev_private = dev->data->dev_private;
+
+ bd_tx_q->nb_tx_desc = nb_tx_desc;
+ memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
+
+ dev->data->tx_queues[tx_queue_id] = bd_tx_q;
+
+ return 0;
+}
+
+static void
+bond_ethdev_rx_queue_release(void *queue)
+{
+ if (queue == NULL)
+ return;
+
+ rte_free(queue);
+}
+
+static void
+bond_ethdev_tx_queue_release(void *queue)
+{
+ if (queue == NULL)
+ return;
+
+ rte_free(queue);
+}
+
+static void
+bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
+{
+ struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
+ struct bond_dev_private *internals;
+
+ /* Default value for polling slave found is true as we don't want to
+ * disable the polling thread if we cannot get the lock */
+ int i, polling_slave_found = 1;
+
+ if (cb_arg == NULL)
+ return;
+
+ bonded_ethdev = (struct rte_eth_dev *)cb_arg;
+ internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
+
+ if (!bonded_ethdev->data->dev_started ||
+ !internals->link_status_polling_enabled)
+ return;
+
+ /* If device is currently being configured then don't check slaves link
+ * status, wait until next period */
+ if (rte_spinlock_trylock(&internals->lock)) {
+ if (internals->slave_count > 0)
+ polling_slave_found = 0;
+
+ for (i = 0; i < internals->slave_count; i++) {
+ if (!internals->slaves[i].link_status_poll_enabled)
+ continue;
+
+ slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
+ polling_slave_found = 1;
+
+ /* Update slave link status */
+ (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
+ internals->slaves[i].link_status_wait_to_complete);
+
+ /* if link status has changed since last checked then call lsc
+ * event callback */
+ if (slave_ethdev->data->dev_link.link_status !=
+ internals->slaves[i].last_link_status) {
+ internals->slaves[i].last_link_status =
+ slave_ethdev->data->dev_link.link_status;
+
+ bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
+ RTE_ETH_EVENT_INTR_LSC,
+ &bonded_ethdev->data->port_id);
+ }
+ }
+ rte_spinlock_unlock(&internals->lock);
+ }
+
+ if (polling_slave_found)
+ /* Set alarm to continue monitoring link status of slave ethdev's */
+ rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
+ bond_ethdev_slave_link_status_change_monitor, cb_arg);
+}
+
+static int
+bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
+ int wait_to_complete)
+{
+ struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+ if (!bonded_eth_dev->data->dev_started ||
+ internals->active_slave_count == 0) {
+ bonded_eth_dev->data->dev_link.link_status = 0;
+ return 0;
+ } else {
+ struct rte_eth_dev *slave_eth_dev;
+ int i, link_up = 0;
+
+ for (i = 0; i < internals->active_slave_count; i++) {
+ slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
+
+ (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
+ wait_to_complete);
+ if (slave_eth_dev->data->dev_link.link_status == 1) {
+ link_up = 1;
+ break;
+ }
+ }
+
+ bonded_eth_dev->data->dev_link.link_status = link_up;
+ }
+
+ return 0;
+}
+
+static void
+bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+ struct bond_dev_private *internals = dev->data->dev_private;
+ struct rte_eth_stats slave_stats;
+
+ int i;
+
+ /* clear bonded stats before populating from slaves */
+ memset(stats, 0, sizeof(*stats));
+
+ for (i = 0; i < internals->slave_count; i++) {
+ rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
+
+ stats->ipackets += slave_stats.ipackets;
+ stats->opackets += slave_stats.opackets;
+ stats->ibytes += slave_stats.ibytes;
+ stats->obytes += slave_stats.obytes;
+ stats->ierrors += slave_stats.ierrors;
+ stats->oerrors += slave_stats.oerrors;
+ stats->imcasts += slave_stats.imcasts;
+ stats->rx_nombuf += slave_stats.rx_nombuf;
+ stats->fdirmatch += slave_stats.fdirmatch;
+ stats->fdirmiss += slave_stats.fdirmiss;
+ stats->tx_pause_xon += slave_stats.tx_pause_xon;
+ stats->rx_pause_xon += slave_stats.rx_pause_xon;
+ stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
+ stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
+ }
+}
+
+static void
+bond_ethdev_stats_reset(struct rte_eth_dev *dev)
+{
+ struct bond_dev_private *internals = dev->data->dev_private;
+ int i;
+
+ for (i = 0; i < internals->slave_count; i++)
+ rte_eth_stats_reset(internals->slaves[i].port_id);
+}
+
+static void
+bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
+{
+ struct bond_dev_private *internals = eth_dev->data->dev_private;
+ int i;
+
+ internals->promiscuous_en = 1;
+
+ switch (internals->mode) {
+ /* Promiscuous mode is propagated to all slaves */
+ case BONDING_MODE_ROUND_ROBIN:
+ case BONDING_MODE_BALANCE:
+ case BONDING_MODE_BROADCAST:
+ for (i = 0; i < internals->slave_count; i++)
+ rte_eth_promiscuous_enable(internals->slaves[i].port_id);
+ break;
+ /* In mode4 promiscus mode is managed when slave is added/removed */
+ case BONDING_MODE_8023AD:
+ break;
+ /* Promiscuous mode is propagated only to primary slave */
+ case BONDING_MODE_ACTIVE_BACKUP:
+ case BONDING_MODE_TLB:
+ case BONDING_MODE_ALB:
+ default:
+ rte_eth_promiscuous_enable(internals->current_primary_port);
+ }
+}
+
+static void
+bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ struct bond_dev_private *internals = dev->data->dev_private;
+ int i;
+
+ internals->promiscuous_en = 0;
+
+ switch (internals->mode) {
+ /* Promiscuous mode is propagated to all slaves */
+ case BONDING_MODE_ROUND_ROBIN:
+ case BONDING_MODE_BALANCE:
+ case BONDING_MODE_BROADCAST:
+ for (i = 0; i < internals->slave_count; i++)
+ rte_eth_promiscuous_disable(internals->slaves[i].port_id);
+ break;
+ /* In mode4 promiscus mode is set managed when slave is added/removed */
+ case BONDING_MODE_8023AD:
+ break;
+ /* Promiscuous mode is propagated only to primary slave */
+ case BONDING_MODE_ACTIVE_BACKUP:
+ case BONDING_MODE_TLB:
+ case BONDING_MODE_ALB:
+ default:
+ rte_eth_promiscuous_disable(internals->current_primary_port);
+ }
+}
+
+static void
+bond_ethdev_delayed_lsc_propagation(void *arg)
+{
+ if (arg == NULL)
+ return;
+
+ _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
+ RTE_ETH_EVENT_INTR_LSC);
+}
+
+void
+bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
+ void *param)
+{
+ struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
+ struct bond_dev_private *internals;
+ struct rte_eth_link link;
+
+ int i, valid_slave = 0;
+ uint8_t active_pos;
+ uint8_t lsc_flag = 0;
+
+ if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
+ return;
+
+ bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
+ slave_eth_dev = &rte_eth_devices[port_id];
+
+ if (valid_bonded_ethdev(bonded_eth_dev))
+ return;
+
+ internals = bonded_eth_dev->data->dev_private;
+
+ /* If the device isn't started don't handle interrupts */
+ if (!bonded_eth_dev->data->dev_started)
+ return;
+
+ /* verify that port_id is a valid slave of bonded port */
+ for (i = 0; i < internals->slave_count; i++) {
+ if (internals->slaves[i].port_id == port_id) {
+ valid_slave = 1;
+ break;
+ }
+ }
+
+ if (!valid_slave)
+ return;
+
+ /* Search for port in active port list */
+ active_pos = find_slave_by_id(internals->active_slaves,
+ internals->active_slave_count, port_id);
+
+ rte_eth_link_get_nowait(port_id, &link);
+ if (link.link_status) {
+ if (active_pos < internals->active_slave_count)
+ return;
+
+ /* if no active slave ports then set this port to be primary port */
+ if (internals->active_slave_count < 1) {
+ /* If first active slave, then change link status */
+ bonded_eth_dev->data->dev_link.link_status = 1;
+ internals->current_primary_port = port_id;
+ lsc_flag = 1;
+
+ mac_address_slaves_update(bonded_eth_dev);
+
+ /* Inherit eth dev link properties from first active slave */
+ link_properties_set(bonded_eth_dev,
+ &(slave_eth_dev->data->dev_link));
+ }
+
+ activate_slave(bonded_eth_dev, port_id);
+
+ /* If user has defined the primary port then default to using it */
+ if (internals->user_defined_primary_port &&
+ internals->primary_port == port_id)
+ bond_ethdev_primary_set(internals, port_id);
+ } else {
+ if (active_pos == internals->active_slave_count)
+ return;
+
+ /* Remove from active slave list */
+ deactivate_slave(bonded_eth_dev, port_id);
+
+ /* No active slaves, change link status to down and reset other
+ * link properties */
+ if (internals->active_slave_count < 1) {
+ lsc_flag = 1;
+ bonded_eth_dev->data->dev_link.link_status = 0;
+
+ link_properties_reset(bonded_eth_dev);
+ }
+
+ /* Update primary id, take first active slave from list or if none
+ * available set to -1 */
+ if (port_id == internals->current_primary_port) {
+ if (internals->active_slave_count > 0)
+ bond_ethdev_primary_set(internals,
+ internals->active_slaves[0]);
+ else
+ internals->current_primary_port = internals->primary_port;
+ }
+ }
+
+ if (lsc_flag) {
+ /* Cancel any possible outstanding interrupts if delays are enabled */
+ if (internals->link_up_delay_ms > 0 ||
+ internals->link_down_delay_ms > 0)
+ rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
+ bonded_eth_dev);
+
+ if (bonded_eth_dev->data->dev_link.link_status) {
+ if (internals->link_up_delay_ms > 0)
+ rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
+ bond_ethdev_delayed_lsc_propagation,
+ (void *)bonded_eth_dev);
+ else
+ _rte_eth_dev_callback_process(bonded_eth_dev,
+ RTE_ETH_EVENT_INTR_LSC);
+
+ } else {
+ if (internals->link_down_delay_ms > 0)
+ rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
+ bond_ethdev_delayed_lsc_propagation,
+ (void *)bonded_eth_dev);
+ else
+ _rte_eth_dev_callback_process(bonded_eth_dev,
+ RTE_ETH_EVENT_INTR_LSC);
+ }
+ }
+}
+
+struct eth_dev_ops default_dev_ops = {
+ .dev_start = bond_ethdev_start,
+ .dev_stop = bond_ethdev_stop,
+ .dev_close = bond_ethdev_close,
+ .dev_configure = bond_ethdev_configure,
+ .dev_infos_get = bond_ethdev_info,
+ .rx_queue_setup = bond_ethdev_rx_queue_setup,
+ .tx_queue_setup = bond_ethdev_tx_queue_setup,
+ .rx_queue_release = bond_ethdev_rx_queue_release,
+ .tx_queue_release = bond_ethdev_tx_queue_release,
+ .link_update = bond_ethdev_link_update,
+ .stats_get = bond_ethdev_stats_get,
+ .stats_reset = bond_ethdev_stats_reset,
+ .promiscuous_enable = bond_ethdev_promiscuous_enable,
+ .promiscuous_disable = bond_ethdev_promiscuous_disable
+};
+
+static int
+bond_init(const char *name, const char *params)
+{
+ struct bond_dev_private *internals;
+ struct rte_kvargs *kvlist;
+ uint8_t bonding_mode, socket_id;
+ int arg_count, port_id;
+
+ RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
+
+ kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
+ if (kvlist == NULL)
+ return -1;
+
+ /* Parse link bonding mode */
+ if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
+ if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
+ &bond_ethdev_parse_slave_mode_kvarg,
+ &bonding_mode) != 0) {
+ RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
+ name);
+ goto parse_error;
+ }
+ } else {
+ RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
+ "device %s\n", name);
+ goto parse_error;
+ }
+
+ /* Parse socket id to create bonding device on */
+ arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
+ if (arg_count == 1) {
+ if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
+ &bond_ethdev_parse_socket_id_kvarg, &socket_id)
+ != 0) {
+ RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
+ "bonded device %s\n", name);
+ goto parse_error;
+ }
+ } else if (arg_count > 1) {
+ RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
+ "bonded device %s\n", name);
+ goto parse_error;
+ } else {
+ socket_id = rte_socket_id();
+ }
+
+ /* Create link bonding eth device */
+ port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
+ if (port_id < 0) {
+ RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
+ "socket %u.\n", name, bonding_mode, socket_id);
+ goto parse_error;
+ }
+ internals = rte_eth_devices[port_id].data->dev_private;
+ internals->kvlist = kvlist;
+
+ RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
+ "socket %u.\n", name, port_id, bonding_mode, socket_id);
+ return 0;
+
+parse_error:
+ rte_kvargs_free(kvlist);
+
+ return -1;
+}
+
+/* this part will resolve the slave portids after all the other pdev and vdev
+ * have been allocated */
+static int
+bond_ethdev_configure(struct rte_eth_dev *dev)
+{
+ char *name = dev->data->name;
+ struct bond_dev_private *internals = dev->data->dev_private;
+ struct rte_kvargs *kvlist = internals->kvlist;
+ int arg_count;
+ uint8_t port_id = dev - rte_eth_devices;
+
+ /*
+ * if no kvlist, it means that this bonded device has been created
+ * through the bonding api.
+ */
+ if (!kvlist)
+ return 0;
+
+ /* Parse MAC address for bonded device */
+ arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
+ if (arg_count == 1) {
+ struct ether_addr bond_mac;
+
+ if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
+ &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
+ RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
+ name);
+ return -1;
+ }
+
+ /* Set MAC address */
+ if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to set mac address on bonded device %s\n",
+ name);
+ return -1;
+ }
+ } else if (arg_count > 1) {
+ RTE_LOG(ERR, EAL,
+ "MAC address can be specified only once for bonded device %s\n",
+ name);
+ return -1;
+ }
+
+ /* Parse/set balance mode transmit policy */
+ arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
+ if (arg_count == 1) {
+ uint8_t xmit_policy;
+
+ if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
+ &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
+ 0) {
+ RTE_LOG(INFO, EAL,
+ "Invalid xmit policy specified for bonded device %s\n",
+ name);
+ return -1;
+ }
+
+ /* Set balance mode transmit policy*/
+ if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to set balance xmit policy on bonded device %s\n",
+ name);
+ return -1;
+ }
+ } else if (arg_count > 1) {
+ RTE_LOG(ERR, EAL,
+ "Transmit policy can be specified only once for bonded device"
+ " %s\n", name);
+ return -1;
+ }
+
+ /* Parse/add slave ports to bonded device */
+ if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
+ struct bond_ethdev_slave_ports slave_ports;
+ unsigned i;
+
+ memset(&slave_ports, 0, sizeof(slave_ports));
+
+ if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
+ &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to parse slave ports for bonded device %s\n",
+ name);
+ return -1;
+ }
+
+ for (i = 0; i < slave_ports.slave_count; i++) {
+ if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to add port %d as slave to bonded device %s\n",
+ slave_ports.slaves[i], name);
+ }
+ }
+
+ } else {
+ RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
+ return -1;
+ }
+
+ /* Parse/set primary slave port id*/
+ arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
+ if (arg_count == 1) {
+ uint8_t primary_slave_port_id;
+
+ if (rte_kvargs_process(kvlist,
+ PMD_BOND_PRIMARY_SLAVE_KVARG,
+ &bond_ethdev_parse_primary_slave_port_id_kvarg,
+ &primary_slave_port_id) < 0) {
+ RTE_LOG(INFO, EAL,
+ "Invalid primary slave port id specified for bonded device"
+ " %s\n", name);
+ return -1;
+ }
+
+ /* Set balance mode transmit policy*/
+ if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
+ != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to set primary slave port %d on bonded device %s\n",
+ primary_slave_port_id, name);
+ return -1;
+ }
+ } else if (arg_count > 1) {
+ RTE_LOG(INFO, EAL,
+ "Primary slave can be specified only once for bonded device"
+ " %s\n", name);
+ return -1;
+ }
+
+ /* Parse link status monitor polling interval */
+ arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
+ if (arg_count == 1) {
+ uint32_t lsc_poll_interval_ms;
+
+ if (rte_kvargs_process(kvlist,
+ PMD_BOND_LSC_POLL_PERIOD_KVARG,
+ &bond_ethdev_parse_time_ms_kvarg,
+ &lsc_poll_interval_ms) < 0) {
+ RTE_LOG(INFO, EAL,
+ "Invalid lsc polling interval value specified for bonded"
+ " device %s\n", name);
+ return -1;
+ }
+
+ if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
+ != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to set lsc monitor polling interval (%u ms) on"
+ " bonded device %s\n", lsc_poll_interval_ms, name);
+ return -1;
+ }
+ } else if (arg_count > 1) {
+ RTE_LOG(INFO, EAL,
+ "LSC polling interval can be specified only once for bonded"
+ " device %s\n", name);
+ return -1;
+ }
+
+ /* Parse link up interrupt propagation delay */
+ arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
+ if (arg_count == 1) {
+ uint32_t link_up_delay_ms;
+
+ if (rte_kvargs_process(kvlist,
+ PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
+ &bond_ethdev_parse_time_ms_kvarg,
+ &link_up_delay_ms) < 0) {
+ RTE_LOG(INFO, EAL,
+ "Invalid link up propagation delay value specified for"
+ " bonded device %s\n", name);
+ return -1;
+ }
+
+ /* Set balance mode transmit policy*/
+ if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
+ != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to set link up propagation delay (%u ms) on bonded"
+ " device %s\n", link_up_delay_ms, name);
+ return -1;
+ }
+ } else if (arg_count > 1) {
+ RTE_LOG(INFO, EAL,
+ "Link up propagation delay can be specified only once for"
+ " bonded device %s\n", name);
+ return -1;
+ }
+
+ /* Parse link down interrupt propagation delay */
+ arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
+ if (arg_count == 1) {
+ uint32_t link_down_delay_ms;
+
+ if (rte_kvargs_process(kvlist,
+ PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
+ &bond_ethdev_parse_time_ms_kvarg,
+ &link_down_delay_ms) < 0) {
+ RTE_LOG(INFO, EAL,
+ "Invalid link down propagation delay value specified for"
+ " bonded device %s\n", name);
+ return -1;
+ }
+
+ /* Set balance mode transmit policy*/
+ if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
+ != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to set link down propagation delay (%u ms) on"
+ " bonded device %s\n", link_down_delay_ms, name);
+ return -1;
+ }
+ } else if (arg_count > 1) {
+ RTE_LOG(INFO, EAL,
+ "Link down propagation delay can be specified only once for"
+ " bonded device %s\n", name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct rte_driver bond_drv = {
+ .name = "eth_bond",
+ .type = PMD_VDEV,
+ .init = bond_init,
+};
+
+PMD_REGISTER_DRIVER(bond_drv);
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_BOND_PRIVATE_H_
+#define _RTE_ETH_BOND_PRIVATE_H_
+
+#include <rte_ethdev.h>
+#include <rte_spinlock.h>
+
+#include "rte_eth_bond.h"
+#include "rte_eth_bond_8023ad_private.h"
+#include "rte_eth_bond_alb.h"
+
+#define PMD_BOND_SLAVE_PORT_KVARG ("slave")
+#define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary")
+#define PMD_BOND_MODE_KVARG ("mode")
+#define PMD_BOND_XMIT_POLICY_KVARG ("xmit_policy")
+#define PMD_BOND_SOCKET_ID_KVARG ("socket_id")
+#define PMD_BOND_MAC_ADDR_KVARG ("mac")
+#define PMD_BOND_LSC_POLL_PERIOD_KVARG ("lsc_poll_period_ms")
+#define PMD_BOND_LINK_UP_PROP_DELAY_KVARG ("up_delay")
+#define PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG ("down_delay")
+
+#define PMD_BOND_XMIT_POLICY_LAYER2_KVARG ("l2")
+#define PMD_BOND_XMIT_POLICY_LAYER23_KVARG ("l23")
+#define PMD_BOND_XMIT_POLICY_LAYER34_KVARG ("l34")
+
+#define RTE_BOND_LOG(lvl, msg, ...) \
+ RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__)
+
+#define BONDING_MODE_INVALID 0xFF
+
+extern const char *pmd_bond_init_valid_arguments[];
+
+extern const char *driver_name;
+
+/** Port Queue Mapping Structure */
+struct bond_rx_queue {
+ uint16_t queue_id;
+ /**< Queue Id */
+ struct bond_dev_private *dev_private;
+ /**< Reference to eth_dev private structure */
+ uint16_t nb_rx_desc;
+ /**< Number of RX descriptors available for the queue */
+ struct rte_eth_rxconf rx_conf;
+ /**< Copy of RX configuration structure for queue */
+ struct rte_mempool *mb_pool;
+ /**< Reference to mbuf pool to use for RX queue */
+};
+
+struct bond_tx_queue {
+ uint16_t queue_id;
+ /**< Queue Id */
+ struct bond_dev_private *dev_private;
+ /**< Reference to dev private structure */
+ uint16_t nb_tx_desc;
+ /**< Number of TX descriptors available for the queue */
+ struct rte_eth_txconf tx_conf;
+ /**< Copy of TX configuration structure for queue */
+};
+
+/** Bonded slave devices structure */
+struct bond_ethdev_slave_ports {
+ uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */
+ uint8_t slave_count; /**< Number of slaves */
+};
+
+struct bond_slave_details {
+ uint8_t port_id;
+
+ uint8_t link_status_poll_enabled;
+ uint8_t link_status_wait_to_complete;
+ uint8_t last_link_status;
+ /**< Port Id of slave eth_dev */
+ struct ether_addr persisted_mac_addr;
+};
+
+
+typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count);
+
+/** Link Bonding PMD device private configuration Structure */
+struct bond_dev_private {
+ uint8_t port_id; /**< Port Id of Bonded Port */
+ uint8_t mode; /**< Link Bonding Mode */
+
+ rte_spinlock_t lock;
+
+ uint8_t primary_port; /**< Primary Slave Port */
+ uint8_t current_primary_port; /**< Primary Slave Port */
+ uint8_t user_defined_primary_port;
+ /**< Flag for whether primary port is user defined or not */
+
+ uint8_t balance_xmit_policy;
+ /**< Transmit policy - l2 / l23 / l34 for operation in balance mode */
+ xmit_hash_t xmit_hash;
+ /**< Transmit policy hash function */
+
+ uint8_t user_defined_mac;
+ /**< Flag for whether MAC address is user defined or not */
+ uint8_t promiscuous_en;
+ /**< Enabled/disable promiscuous mode on bonding device */
+ uint8_t link_props_set;
+ /**< flag to denote if the link properties are set */
+
+ uint8_t link_status_polling_enabled;
+ uint32_t link_status_polling_interval_ms;
+
+ uint32_t link_down_delay_ms;
+ uint32_t link_up_delay_ms;
+
+ uint16_t nb_rx_queues; /**< Total number of rx queues */
+ uint16_t nb_tx_queues; /**< Total number of tx queues*/
+
+ uint8_t active_slave_count; /**< Number of active slaves */
+ uint8_t active_slaves[RTE_MAX_ETHPORTS]; /**< Active slave list */
+
+ uint8_t slave_count; /**< Number of bonded slaves */
+ struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
+ /**< Arary of bonded slaves details */
+
+ struct mode8023ad_private mode4;
+ uint8_t tlb_slaves_order[RTE_MAX_ETHPORTS]; /* TLB active slaves send order */
+ struct mode_alb_private mode6;
+
+ uint32_t rx_offload_capa; /** Rx offload capability */
+ uint32_t tx_offload_capa; /** Tx offload capability */
+
+ struct rte_kvargs *kvlist;
+ uint8_t slave_update_idx;
+};
+
+extern struct eth_dev_ops default_dev_ops;
+
+int
+valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
+
+/* Search given slave array to find possition of given id.
+ * Return slave pos or slaves_count if not found. */
+static inline uint8_t
+find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, uint8_t slave_id) {
+
+ uint8_t pos;
+ for (pos = 0; pos < slaves_count; pos++) {
+ if (slave_id == slaves[pos])
+ break;
+ }
+
+ return pos;
+}
+
+int
+valid_port_id(uint8_t port_id);
+
+int
+valid_bonded_port_id(uint8_t port_id);
+
+int
+valid_slave_port_id(uint8_t port_id);
+
+void
+deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id);
+
+void
+activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id);
+
+void
+link_properties_set(struct rte_eth_dev *bonded_eth_dev,
+ struct rte_eth_link *slave_dev_link);
+void
+link_properties_reset(struct rte_eth_dev *bonded_eth_dev);
+
+int
+link_properties_valid(struct rte_eth_link *bonded_dev_link,
+ struct rte_eth_link *slave_dev_link);
+
+int
+mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr);
+
+int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr);
+
+int
+mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
+
+uint8_t
+number_of_sockets(void);
+
+int
+bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode);
+
+int
+slave_configure(struct rte_eth_dev *bonded_eth_dev,
+ struct rte_eth_dev *slave_eth_dev);
+
+void
+slave_remove(struct bond_dev_private *internals,
+ struct rte_eth_dev *slave_eth_dev);
+
+void
+slave_add(struct bond_dev_private *internals,
+ struct rte_eth_dev *slave_eth_dev);
+
+uint16_t
+xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+
+uint16_t
+xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+
+uint16_t
+xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+
+void
+bond_ethdev_primary_set(struct bond_dev_private *internals,
+ uint8_t slave_port_id);
+
+void
+bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
+ void *param);
+
+int
+bond_ethdev_parse_slave_port_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args);
+
+int
+bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args);
+
+int
+bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args);
+
+int
+bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args);
+
+int
+bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args);
+
+int
+bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args);
+
+int
+bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused,
+ const char *value, void *extra_args);
+
+void
+bond_tlb_disable(struct bond_dev_private *internals);
+
+void
+bond_tlb_enable(struct bond_dev_private *internals);
+
+void
+bond_tlb_activate_slave(struct bond_dev_private *internals);
+
+#endif
--- /dev/null
+DPDK_2.0 {
+ global:
+
+ rte_eth_bond_8023ad_conf_get;
+ rte_eth_bond_8023ad_setup;
+ rte_eth_bond_active_slaves_get;
+ rte_eth_bond_create;
+ rte_eth_bond_link_monitoring_set;
+ rte_eth_bond_mac_address_reset;
+ rte_eth_bond_mac_address_set;
+ rte_eth_bond_mode_get;
+ rte_eth_bond_mode_set;
+ rte_eth_bond_primary_get;
+ rte_eth_bond_primary_set;
+ rte_eth_bond_slave_add;
+ rte_eth_bond_slave_remove;
+ rte_eth_bond_slaves_get;
+ rte_eth_bond_xmit_policy_get;
+ rte_eth_bond_xmit_policy_set;
+
+ local: *;
+};
DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += librte_pmd_fm10k
DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += librte_pmd_mlx4
DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += librte_pmd_enic
-DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
+++ /dev/null
-# BSD LICENSE
-#
-# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Intel Corporation nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# library name
-#
-LIB = librte_pmd_bond.a
-
-CFLAGS += -O3
-CFLAGS += $(WERROR_FLAGS)
-
-EXPORT_MAP := rte_eth_bond_version.map
-
-LIBABIVER := 1
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
-SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
-SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
-SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
-SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_alb.c
-
-#
-# Export include files
-#
-SYMLINK-y-include += rte_eth_bond.h
-SYMLINK-y-include += rte_eth_bond_8023ad.h
-
-# this lib depends upon:
-DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_mbuf
-DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_ether
-DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_malloc
-DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_eal
-DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += lib/librte_kvargs
-
-include $(RTE_SDK)/mk/rte.lib.mk
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_ETH_BOND_H_
-#define _RTE_ETH_BOND_H_
-
-/**
- * @file rte_eth_bond.h
- *
- * RTE Link Bonding Ethernet Device
- * Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple
- * (slave) NICs into a single logical interface. The bonded device processes
- * these interfaces based on the mode of operation specified and supported.
- * This implementation supports 4 modes of operation round robin, active backup
- * balance and broadcast. Providing redundant links, fault tolerance and/or
- * load balancing of network ports
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <rte_ether.h>
-
-/* Supported modes of operation of link bonding library */
-
-#define BONDING_MODE_ROUND_ROBIN (0)
-/**< Round Robin (Mode 0).
- * In this mode all transmitted packets will be balanced equally across all
- * active slaves of the bonded in a round robin fashion. */
-#define BONDING_MODE_ACTIVE_BACKUP (1)
-/**< Active Backup (Mode 1).
- * In this mode all packets transmitted will be transmitted on the primary
- * slave until such point as the primary slave is no longer available and then
- * transmitted packets will be sent on the next available slaves. The primary
- * slave can be defined by the user but defaults to the first active slave
- * available if not specified. */
-#define BONDING_MODE_BALANCE (2)
-/**< Balance (Mode 2).
- * In this mode all packets transmitted will be balanced across the available
- * slaves using one of three available transmit policies - l2, l2+3 or l3+4.
- * See BALANCE_XMIT_POLICY macros definitions for further details on transmit
- * policies. */
-#define BONDING_MODE_BROADCAST (3)
-/**< Broadcast (Mode 3).
- * In this mode all transmitted packets will be transmitted on all available
- * active slaves of the bonded. */
-#define BONDING_MODE_8023AD (4)
-/**< 802.3AD (Mode 4).
- *
- * This mode provides auto negotiation/configuration
- * of peers and well as link status changes monitoring using out of band
- * LACP (link aggregation control protocol) messages. For further details of
- * LACP specification see the IEEE 802.3ad/802.1AX standards. It is also
- * described here
- * https://www.kernel.org/doc/Documentation/networking/bonding.txt.
- *
- * Important Usage Notes:
- * - for LACP mode to work the rx/tx burst functions must be invoked
- * at least once every 100ms, otherwise the out-of-band LACP messages will not
- * be handled with the expected latency and this may cause the link status to be
- * incorrectly marked as down or failure to correctly negotiate with peers.
- * - For optimal performance during initial handshaking the array of mbufs provided
- * to rx_burst should be at least 2 times the slave count size.
- *
- */
-#define BONDING_MODE_TLB (5)
-/**< Adaptive TLB (Mode 5)
- * This mode provides an adaptive transmit load balancing. It dynamically
- * changes the transmitting slave, according to the computed load. Statistics
- * are collected in 100ms intervals and scheduled every 10ms */
-#define BONDING_MODE_ALB (6)
-/**< Adaptive Load Balancing (Mode 6)
- * This mode includes adaptive TLB and receive load balancing (RLB). In RLB the
- * bonding driver intercepts ARP replies send by local system and overwrites its
- * source MAC address, so that different peers send data to the server on
- * different slave interfaces. When local system sends ARP request, it saves IP
- * information from it. When ARP reply from that peer is received, its MAC is
- * stored, one of slave MACs assigned and ARP reply send to that peer.
- */
-
-/* Balance Mode Transmit Policies */
-#define BALANCE_XMIT_POLICY_LAYER2 (0)
-/**< Layer 2 (Ethernet MAC) */
-#define BALANCE_XMIT_POLICY_LAYER23 (1)
-/**< Layer 2+3 (Ethernet MAC + IP Addresses) transmit load balancing */
-#define BALANCE_XMIT_POLICY_LAYER34 (2)
-/**< Layer 3+4 (IP Addresses + UDP Ports) transmit load balancing */
-
-/**
- * Create a bonded rte_eth_dev device
- *
- * @param name Name of new link bonding device.
- * @param mode Mode to initialize bonding device in.
- * @param socket_id Socket Id on which to allocate eth_dev resources.
- *
- * @return
- * Port Id of created rte_eth_dev on success, negative value otherwise
- */
-int
-rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id);
-
-/**
- * Add a rte_eth_dev device as a slave to the bonded device
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param slave_port_id Port ID of slave device.
- *
- * @return
- * 0 on success, negative value otherwise
- */
-int
-rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id);
-
-/**
- * Remove a slave rte_eth_dev device from the bonded device
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param slave_port_id Port ID of slave device.
- *
- * @return
- * 0 on success, negative value otherwise
- */
-int
-rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id);
-
-/**
- * Set link bonding mode of bonded device
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param mode Bonding mode to set
- *
- * @return
- * 0 on success, negative value otherwise
- */
-int
-rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode);
-
-/**
- * Get link bonding mode of bonded device
- *
- * @param bonded_port_id Port ID of bonded device.
- *
- * @return
- * link bonding mode on success, negative value otherwise
- */
-int
-rte_eth_bond_mode_get(uint8_t bonded_port_id);
-
-/**
- * Set slave rte_eth_dev as primary slave of bonded device
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param slave_port_id Port ID of slave device.
- *
- * @return
- * 0 on success, negative value otherwise
- */
-int
-rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id);
-
-/**
- * Get primary slave of bonded device
- *
- * @param bonded_port_id Port ID of bonded device.
- *
- * @return
- * Port Id of primary slave on success, -1 on failure
- */
-int
-rte_eth_bond_primary_get(uint8_t bonded_port_id);
-
-/**
- * Populate an array with list of the slaves port id's of the bonded device
- *
- * @param bonded_port_id Port ID of bonded eth_dev to interrogate
- * @param slaves Array to be populated with the current active slaves
- * @param len Length of slaves array
- *
- * @return
- * Number of slaves associated with bonded device on success,
- * negative value otherwise
- */
-int
-rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len);
-
-/**
- * Populate an array with list of the active slaves port id's of the bonded
- * device.
- *
- * @param bonded_port_id Port ID of bonded eth_dev to interrogate
- * @param slaves Array to be populated with the current active slaves
- * @param len Length of slaves array
- *
- * @return
- * Number of active slaves associated with bonded device on success,
- * negative value otherwise
- */
-int
-rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
- uint8_t len);
-
-/**
- * Set explicit MAC address to use on bonded device and it's slaves.
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param mac_addr MAC Address to use on bonded device overriding
- * slaves MAC addresses
- *
- * @return
- * 0 on success, negative value otherwise
- */
-int
-rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
- struct ether_addr *mac_addr);
-
-/**
- * Reset bonded device to use MAC from primary slave on bonded device and it's
- * slaves.
- *
- * @param bonded_port_id Port ID of bonded device.
- *
- * @return
- * 0 on success, negative value otherwise
- */
-int
-rte_eth_bond_mac_address_reset(uint8_t bonded_port_id);
-
-/**
- * Set the transmit policy for bonded device to use when it is operating in
- * balance mode, this parameter is otherwise ignored in other modes of
- * operation.
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param policy Balance mode transmission policy.
- *
- * @return
- * 0 on success, negative value otherwise.
- */
-int
-rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy);
-
-/**
- * Get the transmit policy set on bonded device for balance mode operation
- *
- * @param bonded_port_id Port ID of bonded device.
- *
- * @return
- * Balance transmit policy on success, negative value otherwise.
- */
-int
-rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id);
-
-/**
- * Set the link monitoring frequency (in ms) for monitoring the link status of
- * slave devices
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param internal_ms Monitoring interval in milliseconds
- *
- * @return
- * 0 on success, negative value otherwise.
- */
-
-int
-rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms);
-
-/**
- * Get the current link monitoring frequency (in ms) for monitoring of the link
- * status of slave devices
- *
- * @param bonded_port_id Port ID of bonded device.
- *
- * @return
- * Monitoring interval on success, negative value otherwise.
- */
-int
-rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id);
-
-
-/**
- * Set the period in milliseconds for delaying the disabling of a bonded link
- * when the link down status has been detected
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param delay_ms Delay period in milliseconds.
- *
- * @return
- * 0 on success, negative value otherwise.
- */
-int
-rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms);
-
-/**
- * Get the period in milliseconds set for delaying the disabling of a bonded
- * link when the link down status has been detected
- *
- * @param bonded_port_id Port ID of bonded device.
- *
- * @return
- * Delay period on success, negative value otherwise.
- */
-int
-rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id);
-
-/**
- * Set the period in milliseconds for delaying the enabling of a bonded link
- * when the link up status has been detected
- *
- * @param bonded_port_id Port ID of bonded device.
- * @param delay_ms Delay period in milliseconds.
- *
- * @return
- * 0 on success, negative value otherwise.
- */
-int
-rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms);
-
-/**
- * Get the period in milliseconds set for delaying the enabling of a bonded
- * link when the link up status has been detected
- *
- * @param bonded_port_id Port ID of bonded device.
- *
- * @return
- * Delay period on success, negative value otherwise.
- */
-int
-rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stddef.h>
-#include <string.h>
-#include <stdbool.h>
-
-#include <rte_alarm.h>
-#include <rte_malloc.h>
-#include <rte_errno.h>
-#include <rte_cycles.h>
-
-#include "rte_eth_bond_private.h"
-
-#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
-#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
- bond_dbg_get_time_diff_ms(), slave_id, \
- __func__, ##__VA_ARGS__)
-
-static uint64_t start_time;
-
-static unsigned
-bond_dbg_get_time_diff_ms(void)
-{
- uint64_t now;
-
- now = rte_rdtsc();
- if (start_time == 0)
- start_time = now;
-
- return ((now - start_time) * 1000) / rte_get_tsc_hz();
-}
-
-static void
-bond_print_lacp(struct lacpdu *l)
-{
- char a_address[18];
- char p_address[18];
- char a_state[256] = { 0 };
- char p_state[256] = { 0 };
-
- static const char * const state_labels[] = {
- "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
- };
-
- int a_len = 0;
- int p_len = 0;
- uint8_t i;
- uint8_t *addr;
-
- addr = l->actor.port_params.system.addr_bytes;
- snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
- addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
-
- addr = l->partner.port_params.system.addr_bytes;
- snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
- addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
-
- for (i = 0; i < 8; i++) {
- if ((l->actor.state >> i) & 1) {
- a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
- state_labels[i]);
- }
-
- if ((l->partner.state >> i) & 1) {
- p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
- state_labels[i]);
- }
- }
-
- if (a_len && a_state[a_len-1] == ' ')
- a_state[a_len-1] = '\0';
-
- if (p_len && p_state[p_len-1] == ' ')
- p_state[p_len-1] = '\0';
-
- RTE_LOG(DEBUG, PMD, "LACP: {\n"\
- " subtype= %02X\n"\
- " ver_num=%02X\n"\
- " actor={ tlv=%02X, len=%02X\n"\
- " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
- " state={ %s }\n"\
- " }\n"\
- " partner={ tlv=%02X, len=%02X\n"\
- " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
- " state={ %s }\n"\
- " }\n"\
- " collector={info=%02X, length=%02X, max_delay=%04X\n, " \
- "type_term=%02X, terminator_length = %02X}\n",\
- l->subtype,\
- l->version_number,\
- l->actor.tlv_type_info,\
- l->actor.info_length,\
- l->actor.port_params.system_priority,\
- a_address,\
- l->actor.port_params.key,\
- l->actor.port_params.port_priority,\
- l->actor.port_params.port_number,\
- a_state,\
- l->partner.tlv_type_info,\
- l->partner.info_length,\
- l->partner.port_params.system_priority,\
- p_address,\
- l->partner.port_params.key,\
- l->partner.port_params.port_priority,\
- l->partner.port_params.port_number,\
- p_state,\
- l->tlv_type_collector_info,\
- l->collector_info_length,\
- l->collector_max_delay,\
- l->tlv_type_terminator,\
- l->terminator_length);
-
-}
-#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
-#else
-#define BOND_PRINT_LACP(lacpdu) do { } while (0)
-#define MODE4_DEBUG(fmt, ...) do { } while (0)
-#endif
-
-static const struct ether_addr lacp_mac_addr = {
- .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
-};
-
-struct port mode_8023ad_ports[RTE_MAX_ETHPORTS];
-
-static void
-timer_cancel(uint64_t *timer)
-{
- *timer = 0;
-}
-
-static void
-timer_set(uint64_t *timer, uint64_t timeout)
-{
- *timer = rte_rdtsc() + timeout;
-}
-
-/* Forces given timer to be in expired state. */
-static void
-timer_force_expired(uint64_t *timer)
-{
- *timer = rte_rdtsc();
-}
-
-static bool
-timer_is_stopped(uint64_t *timer)
-{
- return *timer == 0;
-}
-
-static bool
-timer_is_expired(uint64_t *timer)
-{
- return *timer < rte_rdtsc();
-}
-
-/* Timer is in running state if it is not stopped nor expired */
-static bool
-timer_is_running(uint64_t *timer)
-{
- return !timer_is_stopped(timer) && !timer_is_expired(timer);
-}
-
-static void
-set_warning_flags(struct port *port, uint16_t flags)
-{
- int retval;
- uint16_t old;
- uint16_t new_flag = 0;
-
- do {
- old = port->warnings_to_show;
- new_flag = old | flags;
- retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
- } while (unlikely(retval == 0));
-}
-
-static void
-show_warnings(uint8_t slave_id)
-{
- struct port *port = &mode_8023ad_ports[slave_id];
- uint8_t warnings;
-
- do {
- warnings = port->warnings_to_show;
- } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
-
- if (!warnings)
- return;
-
- if (!timer_is_expired(&port->warning_timer))
- return;
-
-
- timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
- rte_get_tsc_hz() / 1000);
-
- if (warnings & WRN_RX_QUEUE_FULL) {
- RTE_LOG(DEBUG, PMD,
- "Slave %u: failed to enqueue LACP packet into RX ring.\n"
- "Receive and transmit functions must be invoked on bonded\n"
- "interface at least 10 times per second or LACP will not\n"
- "work correctly\n", slave_id);
- }
-
- if (warnings & WRN_TX_QUEUE_FULL) {
- RTE_LOG(DEBUG, PMD,
- "Slave %u: failed to enqueue LACP packet into TX ring.\n"
- "Receive and transmit functions must be invoked on bonded\n"
- "interface at least 10 times per second or LACP will not\n"
- "work correctly\n", slave_id);
- }
-
- if (warnings & WRN_RX_MARKER_TO_FAST)
- RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id);
-
- if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
- RTE_LOG(INFO, PMD,
- "Slave %u: ignoring unknown slow protocol frame type", slave_id);
- }
-
- if (warnings & WRN_UNKNOWN_MARKER_TYPE)
- RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id);
-
- if (warnings & WRN_NOT_LACP_CAPABLE)
- MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
-}
-
-static void
-record_default(struct port *port)
-{
- /* Record default parameters for partner. Partner admin parameters
- * are not implemented so set them to arbitrary default (last known) and
- * mark actor that parner is in defaulted state. */
- port->partner_state = STATE_LACP_ACTIVE;
- ACTOR_STATE_SET(port, DEFAULTED);
-}
-
-/** Function handles rx state machine.
- *
- * This function implements Receive State Machine from point 5.4.12 in
- * 802.1AX documentation. It should be called periodically.
- *
- * @param lacpdu LACPDU received.
- * @param port Port on which LACPDU was received.
- */
-static void
-rx_machine(struct bond_dev_private *internals, uint8_t slave_id,
- struct lacpdu *lacp)
-{
- struct port *agg, *port = &mode_8023ad_ports[slave_id];
- uint64_t timeout;
-
- if (SM_FLAG(port, BEGIN)) {
- /* Initialize stuff */
- MODE4_DEBUG("-> INITIALIZE\n");
- SM_FLAG_CLR(port, MOVED);
- port->selected = UNSELECTED;
-
- record_default(port);
-
- ACTOR_STATE_CLR(port, EXPIRED);
- timer_cancel(&port->current_while_timer);
-
- /* DISABLED: On initialization partner is out of sync */
- PARTNER_STATE_CLR(port, SYNCHRONIZATION);
-
- /* LACP DISABLED stuff if LACP not enabled on this port */
- if (!SM_FLAG(port, LACP_ENABLED))
- PARTNER_STATE_CLR(port, AGGREGATION);
- else
- PARTNER_STATE_SET(port, AGGREGATION);
- }
-
- if (!SM_FLAG(port, LACP_ENABLED)) {
- /* Update parameters only if state changed */
- if (!timer_is_stopped(&port->current_while_timer)) {
- port->selected = UNSELECTED;
- record_default(port);
- PARTNER_STATE_CLR(port, AGGREGATION);
- ACTOR_STATE_CLR(port, EXPIRED);
- timer_cancel(&port->current_while_timer);
- }
- return;
- }
-
- if (lacp) {
- MODE4_DEBUG("LACP -> CURRENT\n");
- BOND_PRINT_LACP(lacp);
- /* Update selected flag. If partner parameters are defaulted assume they
- * are match. If not defaulted compare LACP actor with ports parner
- * params. */
- if (!ACTOR_STATE(port, DEFAULTED) &&
- (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
- || memcmp(&port->partner, &lacp->actor.port_params,
- sizeof(port->partner)) != 0)) {
- MODE4_DEBUG("selected <- UNSELECTED\n");
- port->selected = UNSELECTED;
- }
-
- /* Record this PDU actor params as partner params */
- memcpy(&port->partner, &lacp->actor.port_params,
- sizeof(struct port_params));
- port->partner_state = lacp->actor.state;
-
- /* Partner parameters are not defaulted any more */
- ACTOR_STATE_CLR(port, DEFAULTED);
-
- /* If LACP partner params match this port actor params */
- agg = &mode_8023ad_ports[port->aggregator_port_id];
- bool match = port->actor.system_priority ==
- lacp->partner.port_params.system_priority &&
- is_same_ether_addr(&agg->actor.system,
- &lacp->partner.port_params.system) &&
- port->actor.port_priority ==
- lacp->partner.port_params.port_priority &&
- port->actor.port_number ==
- lacp->partner.port_params.port_number;
-
- /* Update NTT if partners information are outdated (xored and masked
- * bits are set)*/
- uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
- STATE_SYNCHRONIZATION | STATE_AGGREGATION;
-
- if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
- match == false) {
- SM_FLAG_SET(port, NTT);
- }
-
- /* If LACP partner params match this port actor params */
- if (match == true && ACTOR_STATE(port, AGGREGATION) ==
- PARTNER_STATE(port, AGGREGATION))
- PARTNER_STATE_SET(port, SYNCHRONIZATION);
- else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
- AGGREGATION))
- PARTNER_STATE_SET(port, SYNCHRONIZATION);
- else
- PARTNER_STATE_CLR(port, SYNCHRONIZATION);
-
- if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
- timeout = internals->mode4.short_timeout;
- else
- timeout = internals->mode4.long_timeout;
-
- timer_set(&port->current_while_timer, timeout);
- ACTOR_STATE_CLR(port, EXPIRED);
- return; /* No state change */
- }
-
- /* If CURRENT state timer is not running (stopped or expired)
- * transit to EXPIRED state from DISABLED or CURRENT */
- if (!timer_is_running(&port->current_while_timer)) {
- ACTOR_STATE_SET(port, EXPIRED);
- PARTNER_STATE_CLR(port, SYNCHRONIZATION);
- PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
- timer_set(&port->current_while_timer, internals->mode4.short_timeout);
- }
-}
-
-/**
- * Function handles periodic tx state machine.
- *
- * Function implements Periodic Transmission state machine from point 5.4.13
- * in 802.1AX documentation. It should be called periodically.
- *
- * @param port Port to handle state machine.
- */
-static void
-periodic_machine(struct bond_dev_private *internals, uint8_t slave_id)
-{
- struct port *port = &mode_8023ad_ports[slave_id];
- /* Calculate if either site is LACP enabled */
- uint64_t timeout;
- uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
- PARTNER_STATE(port, LACP_ACTIVE);
-
- uint8_t is_partner_fast, was_partner_fast;
- /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
- if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
- timer_cancel(&port->periodic_timer);
- timer_force_expired(&port->tx_machine_timer);
- SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
-
- MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
- SM_FLAG(port, BEGIN) ? "begind " : "",
- SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
- active ? "LACP active " : "LACP pasive ");
- return;
- }
-
- is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
- was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
-
- /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
- * Other case: check if timer expire or partners settings changed. */
- if (!timer_is_stopped(&port->periodic_timer)) {
- if (timer_is_expired(&port->periodic_timer)) {
- SM_FLAG_SET(port, NTT);
- } else if (is_partner_fast != was_partner_fast) {
- /* Partners timeout was slow and now it is fast -> send LACP.
- * In other case (was fast and now it is slow) just switch
- * timeout to slow without forcing send of LACP (because standard
- * say so)*/
- if (!is_partner_fast)
- SM_FLAG_SET(port, NTT);
- } else
- return; /* Nothing changed */
- }
-
- /* Handle state transition to FAST/SLOW LACP timeout */
- if (is_partner_fast) {
- timeout = internals->mode4.fast_periodic_timeout;
- SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
- } else {
- timeout = internals->mode4.slow_periodic_timeout;
- SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
- }
-
- timer_set(&port->periodic_timer, timeout);
-}
-
-/**
- * Function handles mux state machine.
- *
- * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
- * It should be called periodically.
- *
- * @param port Port to handle state machine.
- */
-static void
-mux_machine(struct bond_dev_private *internals, uint8_t slave_id)
-{
- struct port *port = &mode_8023ad_ports[slave_id];
-
- /* Save current state for later use */
- const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
- STATE_COLLECTING;
-
- /* Enter DETACHED state on BEGIN condition or from any other state if
- * port was unselected */
- if (SM_FLAG(port, BEGIN) ||
- port->selected == UNSELECTED || (port->selected == STANDBY &&
- (port->actor_state & state_mask) != 0)) {
- /* detach mux from aggregator */
- port->actor_state &= ~state_mask;
- /* Set ntt to true if BEGIN condition or transition from any other state
- * which is indicated that wait_while_timer was started */
- if (SM_FLAG(port, BEGIN) ||
- !timer_is_stopped(&port->wait_while_timer)) {
- SM_FLAG_SET(port, NTT);
- MODE4_DEBUG("-> DETACHED\n");
- }
- timer_cancel(&port->wait_while_timer);
- }
-
- if (timer_is_stopped(&port->wait_while_timer)) {
- if (port->selected == SELECTED || port->selected == STANDBY) {
- timer_set(&port->wait_while_timer,
- internals->mode4.aggregate_wait_timeout);
-
- MODE4_DEBUG("DETACHED -> WAITING\n");
- }
- /* Waiting state entered */
- return;
- }
-
- /* Transit next state if port is ready */
- if (!timer_is_expired(&port->wait_while_timer))
- return;
-
- if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
- !PARTNER_STATE(port, SYNCHRONIZATION)) {
- /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
- * sync transit to ATACHED state. */
- ACTOR_STATE_CLR(port, DISTRIBUTING);
- ACTOR_STATE_CLR(port, COLLECTING);
- /* Clear actor sync to activate transit ATACHED in condition bellow */
- ACTOR_STATE_CLR(port, SYNCHRONIZATION);
- MODE4_DEBUG("Out of sync -> ATTACHED\n");
- }
-
- if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
- /* attach mux to aggregator */
- RTE_VERIFY((port->actor_state & (STATE_COLLECTING |
- STATE_DISTRIBUTING)) == 0);
-
- ACTOR_STATE_SET(port, SYNCHRONIZATION);
- SM_FLAG_SET(port, NTT);
- MODE4_DEBUG("ATTACHED Entered\n");
- } else if (!ACTOR_STATE(port, COLLECTING)) {
- /* Start collecting if in sync */
- if (PARTNER_STATE(port, SYNCHRONIZATION)) {
- MODE4_DEBUG("ATTACHED -> COLLECTING\n");
- ACTOR_STATE_SET(port, COLLECTING);
- SM_FLAG_SET(port, NTT);
- }
- } else if (ACTOR_STATE(port, COLLECTING)) {
- /* Check if partner is in COLLECTING state. If so this port can
- * distribute frames to it */
- if (!ACTOR_STATE(port, DISTRIBUTING)) {
- if (PARTNER_STATE(port, COLLECTING)) {
- /* Enable DISTRIBUTING if partner is collecting */
- ACTOR_STATE_SET(port, DISTRIBUTING);
- SM_FLAG_SET(port, NTT);
- MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
- RTE_LOG(INFO, PMD,
- "Bond %u: slave id %u distributing started.\n",
- internals->port_id, slave_id);
- }
- } else {
- if (!PARTNER_STATE(port, COLLECTING)) {
- /* Disable DISTRIBUTING (enter COLLECTING state) if partner
- * is not collecting */
- ACTOR_STATE_CLR(port, DISTRIBUTING);
- SM_FLAG_SET(port, NTT);
- MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
- RTE_LOG(INFO, PMD,
- "Bond %u: slave id %u distributing stopped.\n",
- internals->port_id, slave_id);
- }
- }
- }
-}
-
-/**
- * Function handles transmit state machine.
- *
- * Function implements Transmit Machine from point 5.4.16 in 802.1AX
- * documentation.
- *
- * @param port
- */
-static void
-tx_machine(struct bond_dev_private *internals, uint8_t slave_id)
-{
- struct port *agg, *port = &mode_8023ad_ports[slave_id];
-
- struct rte_mbuf *lacp_pkt = NULL;
- struct lacpdu_header *hdr;
- struct lacpdu *lacpdu;
-
- /* If periodic timer is not running periodic machine is in NO PERIODIC and
- * according to 802.3ax standard tx machine should not transmit any frames
- * and set ntt to false. */
- if (timer_is_stopped(&port->periodic_timer))
- SM_FLAG_CLR(port, NTT);
-
- if (!SM_FLAG(port, NTT))
- return;
-
- if (!timer_is_expired(&port->tx_machine_timer))
- return;
-
- lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
- if (lacp_pkt == NULL) {
- RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
- return;
- }
-
- lacp_pkt->data_len = sizeof(*hdr);
- lacp_pkt->pkt_len = sizeof(*hdr);
-
- hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
-
- /* Source and destination MAC */
- ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
- rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
- hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
-
- lacpdu = &hdr->lacpdu;
- memset(lacpdu, 0, sizeof(*lacpdu));
-
- /* Initialize LACP part */
- lacpdu->subtype = SLOW_SUBTYPE_LACP;
- lacpdu->version_number = 1;
-
- /* ACTOR */
- lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
- lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
- memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
- sizeof(port->actor));
- agg = &mode_8023ad_ports[port->aggregator_port_id];
- ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system);
- lacpdu->actor.state = port->actor_state;
-
- /* PARTNER */
- lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
- lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
- memcpy(&lacpdu->partner.port_params, &port->partner,
- sizeof(struct port_params));
- lacpdu->partner.state = port->partner_state;
-
- /* Other fields */
- lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
- lacpdu->collector_info_length = 0x10;
- lacpdu->collector_max_delay = 0;
-
- lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
- lacpdu->terminator_length = 0;
-
- if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
- /* If TX ring full, drop packet and free message. Retransmission
- * will happen in next function call. */
- rte_pktmbuf_free(lacp_pkt);
- set_warning_flags(port, WRN_TX_QUEUE_FULL);
- return;
- }
-
- MODE4_DEBUG("sending LACP frame\n");
- BOND_PRINT_LACP(lacpdu);
-
- timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
- SM_FLAG_CLR(port, NTT);
-}
-
-/**
- * Function assigns port to aggregator.
- *
- * @param bond_dev_private Pointer to bond_dev_private structure.
- * @param port_pos Port to assign.
- */
-static void
-selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
-{
- struct port *agg, *port;
- uint8_t slaves_count, new_agg_id, i;
- uint8_t *slaves;
-
- slaves = internals->active_slaves;
- slaves_count = internals->active_slave_count;
- port = &mode_8023ad_ports[slave_id];
-
- /* Search for aggregator suitable for this port */
- for (i = 0; i < slaves_count; ++i) {
- agg = &mode_8023ad_ports[slaves[i]];
- /* Skip ports that are not aggreagators */
- if (agg->aggregator_port_id != slaves[i])
- continue;
-
- /* Actors system ID is not checked since all slave device have the same
- * ID (MAC address). */
- if ((agg->actor.key == port->actor.key &&
- agg->partner.system_priority == port->partner.system_priority &&
- is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
- && (agg->partner.key == port->partner.key)) &&
- is_zero_ether_addr(&port->partner.system) != 1 &&
- (agg->actor.key &
- rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
-
- break;
- }
- }
-
- /* By default, port uses it self as agregator */
- if (i == slaves_count)
- new_agg_id = slave_id;
- else
- new_agg_id = slaves[i];
-
- if (new_agg_id != port->aggregator_port_id) {
- port->aggregator_port_id = new_agg_id;
-
- MODE4_DEBUG("-> SELECTED: ID=%3u\n"
- "\t%s aggregator ID=%3u\n",
- port->aggregator_port_id,
- port->aggregator_port_id == slave_id ?
- "aggregator not found, using default" : "aggregator found",
- port->aggregator_port_id);
- }
-
- port->selected = SELECTED;
-}
-
-/* Function maps DPDK speed to bonding speed stored in key field */
-static uint16_t
-link_speed_key(uint16_t speed) {
- uint16_t key_speed;
-
- switch (speed) {
- case ETH_LINK_SPEED_AUTONEG:
- key_speed = 0x00;
- break;
- case ETH_LINK_SPEED_10:
- key_speed = BOND_LINK_SPEED_KEY_10M;
- break;
- case ETH_LINK_SPEED_100:
- key_speed = BOND_LINK_SPEED_KEY_100M;
- break;
- case ETH_LINK_SPEED_1000:
- key_speed = BOND_LINK_SPEED_KEY_1000M;
- break;
- case ETH_LINK_SPEED_10G:
- key_speed = BOND_LINK_SPEED_KEY_10G;
- break;
- case ETH_LINK_SPEED_20G:
- key_speed = BOND_LINK_SPEED_KEY_20G;
- break;
- case ETH_LINK_SPEED_40G:
- key_speed = BOND_LINK_SPEED_KEY_40G;
- break;
- default:
- /* Unknown speed*/
- key_speed = 0xFFFF;
- }
-
- return key_speed;
-}
-
-static void
-bond_mode_8023ad_periodic_cb(void *arg)
-{
- struct rte_eth_dev *bond_dev = arg;
- struct bond_dev_private *internals = bond_dev->data->dev_private;
- struct port *port;
- struct rte_eth_link link_info;
- struct ether_addr slave_addr;
-
- void *pkt = NULL;
- uint8_t i, slave_id;
-
-
- /* Update link status on each port */
- for (i = 0; i < internals->active_slave_count; i++) {
- uint16_t key;
-
- slave_id = internals->active_slaves[i];
- rte_eth_link_get(slave_id, &link_info);
- rte_eth_macaddr_get(slave_id, &slave_addr);
-
- if (link_info.link_status != 0) {
- key = link_speed_key(link_info.link_speed) << 1;
- if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
- key |= BOND_LINK_FULL_DUPLEX_KEY;
- } else
- key = 0;
-
- port = &mode_8023ad_ports[slave_id];
-
- key = rte_cpu_to_be_16(key);
- if (key != port->actor.key) {
- if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
- set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
-
- port->actor.key = key;
- SM_FLAG_SET(port, NTT);
- }
-
- if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
- ether_addr_copy(&slave_addr, &port->actor.system);
- if (port->aggregator_port_id == slave_id)
- SM_FLAG_SET(port, NTT);
- }
- }
-
- for (i = 0; i < internals->active_slave_count; i++) {
- slave_id = internals->active_slaves[i];
- port = &mode_8023ad_ports[slave_id];
-
- if ((port->actor.key &
- rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
-
- SM_FLAG_SET(port, BEGIN);
-
- /* LACP is disabled on half duples or link is down */
- if (SM_FLAG(port, LACP_ENABLED)) {
- /* If port was enabled set it to BEGIN state */
- SM_FLAG_CLR(port, LACP_ENABLED);
- ACTOR_STATE_CLR(port, DISTRIBUTING);
- ACTOR_STATE_CLR(port, COLLECTING);
- }
-
- /* Skip this port processing */
- continue;
- }
-
- SM_FLAG_SET(port, LACP_ENABLED);
-
- /* Find LACP packet to this port. Do not check subtype, it is done in
- * function that queued packet */
- if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
- struct rte_mbuf *lacp_pkt = pkt;
- struct lacpdu_header *lacp;
-
- lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
- RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
-
- /* This is LACP frame so pass it to rx_machine */
- rx_machine(internals, slave_id, &lacp->lacpdu);
- rte_pktmbuf_free(lacp_pkt);
- } else
- rx_machine(internals, slave_id, NULL);
-
- periodic_machine(internals, slave_id);
- mux_machine(internals, slave_id);
- tx_machine(internals, slave_id);
- selection_logic(internals, slave_id);
-
- SM_FLAG_CLR(port, BEGIN);
- show_warnings(slave_id);
- }
-
- rte_eal_alarm_set(internals->mode4.update_timeout_us,
- bond_mode_8023ad_periodic_cb, arg);
-}
-
-void
-bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id)
-{
- struct bond_dev_private *internals = bond_dev->data->dev_private;
-
- struct port *port = &mode_8023ad_ports[slave_id];
- struct port_params initial = {
- .system = { { 0 } },
- .system_priority = rte_cpu_to_be_16(0xFFFF),
- .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
- .port_priority = rte_cpu_to_be_16(0x00FF),
- .port_number = 0,
- };
-
- char mem_name[RTE_ETH_NAME_MAX_LEN];
- uint8_t socket_id;
- unsigned element_size;
-
- /* Given slave mus not be in active list */
- RTE_VERIFY(find_slave_by_id(internals->active_slaves,
- internals->active_slave_count, slave_id) == internals->active_slave_count);
-
- memcpy(&port->actor, &initial, sizeof(struct port_params));
- /* Standard requires that port ID must be grater than 0.
- * Add 1 do get corresponding port_number */
- port->actor.port_number = rte_cpu_to_be_16((uint16_t)slave_id + 1);
-
- memcpy(&port->partner, &initial, sizeof(struct port_params));
-
- /* default states */
- port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
- port->partner_state = STATE_LACP_ACTIVE;
- port->sm_flags = SM_FLAGS_BEGIN;
-
- /* use this port as agregator */
- port->aggregator_port_id = slave_id;
- rte_eth_promiscuous_enable(slave_id);
-
- timer_cancel(&port->warning_timer);
-
- if (port->mbuf_pool != NULL)
- return;
-
- RTE_VERIFY(port->rx_ring == NULL);
- RTE_VERIFY(port->tx_ring == NULL);
- socket_id = rte_eth_devices[slave_id].pci_dev->numa_node;
-
- element_size = sizeof(struct slow_protocol_frame) + sizeof(struct rte_mbuf)
- + RTE_PKTMBUF_HEADROOM;
-
- /* How big memory pool should be? If driver will not
- * free packets quick enough there will be ENOMEM in tx_machine.
- * For now give 511 pkts * max number of queued TX packets per slave.
- * Hope it will be enough. */
- snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
- port->mbuf_pool = rte_mempool_create(mem_name,
- BOND_MODE_8023AX_SLAVE_TX_PKTS * 512 - 1,
- element_size,
- RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
- sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
- NULL, rte_pktmbuf_init, NULL, socket_id, MEMPOOL_F_NO_SPREAD);
-
- /* Any memory allocation failure in initalization is critical because
- * resources can't be free, so reinitialization is impossible. */
- if (port->mbuf_pool == NULL) {
- rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
- slave_id, mem_name, rte_strerror(rte_errno));
- }
-
- snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
- port->rx_ring = rte_ring_create(mem_name,
- rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
-
- if (port->rx_ring == NULL) {
- rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
- mem_name, rte_strerror(rte_errno));
- }
-
- /* TX ring is at least one pkt longer to make room for marker packet. */
- snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
- port->tx_ring = rte_ring_create(mem_name,
- rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
-
- if (port->tx_ring == NULL) {
- rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
- mem_name, rte_strerror(rte_errno));
- }
-}
-
-int
-bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
- uint8_t slave_id)
-{
- struct bond_dev_private *internals = bond_dev->data->dev_private;
- void *pkt = NULL;
- struct port *port;
- uint8_t i;
-
- /* Given slave mus be in active list */
- RTE_VERIFY(find_slave_by_id(internals->active_slaves,
- internals->active_slave_count, slave_id) < internals->active_slave_count);
-
- /* Exclude slave from transmit policy. If this slave is an aggregator
- * make all aggregated slaves unselected to force sellection logic
- * to select suitable aggregator for this port. */
- for (i = 0; i < internals->active_slave_count; i++) {
- port = &mode_8023ad_ports[internals->active_slaves[i]];
- if (port->aggregator_port_id != slave_id)
- continue;
-
- port->selected = UNSELECTED;
-
- /* Use default aggregator */
- port->aggregator_port_id = internals->active_slaves[i];
- }
-
- port = &mode_8023ad_ports[slave_id];
- port->selected = UNSELECTED;
- port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
- STATE_COLLECTING);
-
- while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
- rte_pktmbuf_free((struct rte_mbuf *)pkt);
-
- while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
- rte_pktmbuf_free((struct rte_mbuf *)pkt);
- return 0;
-}
-
-void
-bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
-{
- struct bond_dev_private *internals = bond_dev->data->dev_private;
- struct ether_addr slave_addr;
- struct port *slave, *agg_slave;
- uint8_t slave_id, i, j;
-
- bond_mode_8023ad_stop(bond_dev);
-
- for (i = 0; i < internals->active_slave_count; i++) {
- slave_id = internals->active_slaves[i];
- slave = &mode_8023ad_ports[slave_id];
- rte_eth_macaddr_get(slave_id, &slave_addr);
-
- if (is_same_ether_addr(&slave_addr, &slave->actor.system))
- continue;
-
- ether_addr_copy(&slave_addr, &slave->actor.system);
- /* Do nothing if this port is not an aggregator. In other case
- * Set NTT flag on every port that use this aggregator. */
- if (slave->aggregator_port_id != slave_id)
- continue;
-
- for (j = 0; j < internals->active_slave_count; j++) {
- agg_slave = &mode_8023ad_ports[internals->active_slaves[j]];
- if (agg_slave->aggregator_port_id == slave_id)
- SM_FLAG_SET(agg_slave, NTT);
- }
- }
-
- if (bond_dev->data->dev_started)
- bond_mode_8023ad_start(bond_dev);
-}
-
-void
-bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
- struct rte_eth_bond_8023ad_conf *conf)
-{
- struct bond_dev_private *internals = dev->data->dev_private;
- struct mode8023ad_private *mode4 = &internals->mode4;
- uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
-
- conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
- conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
- conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
- conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
- conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
- conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
- conf->update_timeout_ms = mode4->update_timeout_us / 1000;
-}
-
-void
-bond_mode_8023ad_setup(struct rte_eth_dev *dev,
- struct rte_eth_bond_8023ad_conf *conf)
-{
- struct rte_eth_bond_8023ad_conf def_conf;
- struct bond_dev_private *internals = dev->data->dev_private;
- struct mode8023ad_private *mode4 = &internals->mode4;
- uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
-
- if (conf == NULL) {
- conf = &def_conf;
- conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
- conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
- conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
- conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
- conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
- conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
- conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
- conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
- }
-
- mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
- mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
- mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
- mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
- mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
- mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
- mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
- mode4->update_timeout_us = conf->update_timeout_ms * 1000;
-}
-
-int
-bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
-{
- struct bond_dev_private *internals = bond_dev->data->dev_private;
- uint8_t i;
-
- for (i = 0; i < internals->active_slave_count; i++)
- bond_mode_8023ad_activate_slave(bond_dev, i);
-
- return 0;
-}
-
-int
-bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
-{
- return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
- &bond_mode_8023ad_periodic_cb, bond_dev);
-}
-
-void
-bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
-{
- rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
-}
-
-void
-bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
- uint8_t slave_id, struct rte_mbuf *pkt)
-{
- struct mode8023ad_private *mode4 = &internals->mode4;
- struct port *port = &mode_8023ad_ports[slave_id];
- struct marker_header *m_hdr;
- uint64_t marker_timer, old_marker_timer;
- int retval;
- uint8_t wrn, subtype;
- /* If packet is a marker, we send response now by reusing given packet
- * and update only source MAC, destination MAC is multicast so don't
- * update it. Other frames will be handled later by state machines */
- subtype = rte_pktmbuf_mtod(pkt,
- struct slow_protocol_frame *)->slow_protocol.subtype;
-
- if (subtype == SLOW_SUBTYPE_MARKER) {
- m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
-
- if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
- wrn = WRN_UNKNOWN_MARKER_TYPE;
- goto free_out;
- }
-
- /* Setup marker timer. Do it in loop in case concurent access. */
- do {
- old_marker_timer = port->rx_marker_timer;
- if (!timer_is_expired(&old_marker_timer)) {
- wrn = WRN_RX_MARKER_TO_FAST;
- goto free_out;
- }
-
- timer_set(&marker_timer, mode4->rx_marker_timeout);
- retval = rte_atomic64_cmpset(&port->rx_marker_timer,
- old_marker_timer, marker_timer);
- } while (unlikely(retval == 0));
-
- m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
- rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
-
- if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) {
- /* reset timer */
- port->rx_marker_timer = 0;
- wrn = WRN_TX_QUEUE_FULL;
- goto free_out;
- }
- } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
- if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
- /* If RX fing full free lacpdu message and drop packet */
- wrn = WRN_RX_QUEUE_FULL;
- goto free_out;
- }
- } else {
- wrn = WRN_UNKNOWN_SLOW_TYPE;
- goto free_out;
- }
-
- return;
-
-free_out:
- set_warning_flags(port, wrn);
- rte_pktmbuf_free(pkt);
-}
-
-int
-rte_eth_bond_8023ad_conf_get(uint8_t port_id,
- struct rte_eth_bond_8023ad_conf *conf)
-{
- struct rte_eth_dev *bond_dev;
-
- if (valid_bonded_port_id(port_id) != 0)
- return -EINVAL;
-
- if (conf == NULL)
- return -EINVAL;
-
- bond_dev = &rte_eth_devices[port_id];
- bond_mode_8023ad_conf_get(bond_dev, conf);
- return 0;
-}
-
-int
-rte_eth_bond_8023ad_setup(uint8_t port_id,
- struct rte_eth_bond_8023ad_conf *conf)
-{
- struct rte_eth_dev *bond_dev;
-
- if (valid_bonded_port_id(port_id) != 0)
- return -EINVAL;
-
- if (conf != NULL) {
- /* Basic sanity check */
- if (conf->slow_periodic_ms == 0 ||
- conf->fast_periodic_ms >= conf->slow_periodic_ms ||
- conf->long_timeout_ms == 0 ||
- conf->short_timeout_ms >= conf->long_timeout_ms ||
- conf->aggregate_wait_timeout_ms == 0 ||
- conf->tx_period_ms == 0 ||
- conf->rx_marker_period_ms == 0 ||
- conf->update_timeout_ms == 0) {
- RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n");
- return -EINVAL;
- }
- }
-
- bond_dev = &rte_eth_devices[port_id];
- bond_mode_8023ad_setup(bond_dev, conf);
-
- return 0;
-}
-
-int
-rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
- struct rte_eth_bond_8023ad_slave_info *info)
-{
- struct rte_eth_dev *bond_dev;
- struct bond_dev_private *internals;
- struct port *port;
-
- if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
- rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
- return -EINVAL;
-
- bond_dev = &rte_eth_devices[port_id];
-
- internals = bond_dev->data->dev_private;
- if (find_slave_by_id(internals->active_slaves,
- internals->active_slave_count, slave_id) ==
- internals->active_slave_count)
- return -EINVAL;
-
- port = &mode_8023ad_ports[slave_id];
- info->selected = port->selected;
-
- info->actor_state = port->actor_state;
- rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
-
- info->partner_state = port->partner_state;
- rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
-
- info->agg_port_id = port->aggregator_port_id;
- return 0;
-}
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RTE_ETH_BOND_8023AD_H_
-#define RTE_ETH_BOND_8023AD_H_
-
-#include <rte_ether.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * Actor/partner states
- */
-#define STATE_LACP_ACTIVE 0x01
-#define STATE_LACP_SHORT_TIMEOUT 0x02
-#define STATE_AGGREGATION 0x04
-#define STATE_SYNCHRONIZATION 0x08
-#define STATE_COLLECTING 0x10
-#define STATE_DISTRIBUTING 0x20
-/** Partners parameters are defaulted */
-#define STATE_DEFAULTED 0x40
-#define STATE_EXPIRED 0x80
-
-#define TLV_TYPE_ACTOR_INFORMATION 0x01
-#define TLV_TYPE_PARTNER_INFORMATION 0x02
-#define TLV_TYPE_COLLECTOR_INFORMATION 0x03
-#define TLV_TYPE_TERMINATOR_INFORMATION 0x00
-
-#define SLOW_SUBTYPE_LACP 0x01
-#define SLOW_SUBTYPE_MARKER 0x02
-
-#define MARKER_TLV_TYPE_INFO 0x01
-#define MARKER_TLV_TYPE_RESP 0x02
-
-enum rte_bond_8023ad_selection {
- UNSELECTED,
- STANDBY,
- SELECTED
-};
-
-/** Generic slow protocol structure */
-struct slow_protocol {
- uint8_t subtype;
- uint8_t reserved_119[119];
-} __attribute__((__packed__));
-
-/** Generic slow protocol frame type structure */
-struct slow_protocol_frame {
- struct ether_hdr eth_hdr;
- struct slow_protocol slow_protocol;
-} __attribute__((__packed__));
-
-struct port_params {
- uint16_t system_priority;
- /**< System priority (unused in current implementation) */
- struct ether_addr system;
- /**< System ID - Slave MAC address, same as bonding MAC address */
- uint16_t key;
- /**< Speed information (implementation dependednt) and duplex. */
- uint16_t port_priority;
- /**< Priority of this (unused in current implementation) */
- uint16_t port_number;
- /**< Port number. It corresponds to slave port id. */
-} __attribute__((__packed__));
-
-struct lacpdu_actor_partner_params {
- uint8_t tlv_type_info;
- uint8_t info_length;
- struct port_params port_params;
- uint8_t state;
- uint8_t reserved_3[3];
-} __attribute__((__packed__));
-
-/** LACPDU structure (5.4.2 in 802.1AX documentation). */
-struct lacpdu {
- uint8_t subtype;
- uint8_t version_number;
-
- struct lacpdu_actor_partner_params actor;
- struct lacpdu_actor_partner_params partner;
-
- uint8_t tlv_type_collector_info;
- uint8_t collector_info_length;
- uint16_t collector_max_delay;
- uint8_t reserved_12[12];
-
- uint8_t tlv_type_terminator;
- uint8_t terminator_length;
- uint8_t reserved_50[50];
-} __attribute__((__packed__));
-
-/** LACPDU frame: Contains ethernet header and LACPDU. */
-struct lacpdu_header {
- struct ether_hdr eth_hdr;
- struct lacpdu lacpdu;
-} __attribute__((__packed__));
-
-struct marker {
- uint8_t subtype;
- uint8_t version_number;
-
- uint8_t tlv_type_marker;
- uint8_t info_length;
- uint16_t requester_port;
- struct ether_addr requester_system;
- uint32_t requester_transaction_id;
- uint8_t reserved_2[2];
-
- uint8_t tlv_type_terminator;
- uint8_t terminator_length;
- uint8_t reserved_90[90];
-} __attribute__((__packed__));
-
-struct marker_header {
- struct ether_hdr eth_hdr;
- struct marker marker;
-} __attribute__((__packed__));
-
-struct rte_eth_bond_8023ad_conf {
- uint32_t fast_periodic_ms;
- uint32_t slow_periodic_ms;
- uint32_t short_timeout_ms;
- uint32_t long_timeout_ms;
- uint32_t aggregate_wait_timeout_ms;
- uint32_t tx_period_ms;
- uint32_t rx_marker_period_ms;
- uint32_t update_timeout_ms;
-};
-
-struct rte_eth_bond_8023ad_slave_info {
- enum rte_bond_8023ad_selection selected;
- uint8_t actor_state;
- struct port_params actor;
- uint8_t partner_state;
- struct port_params partner;
- uint8_t agg_port_id;
-};
-
-/**
- * @internal
- *
- * Function returns current configuration of 802.3AX mode.
- *
- * @param port_id Bonding device id
- * @param conf Pointer to timeout structure.
- *
- * @return
- * 0 - if ok
- * -EINVAL if conf is NULL
- */
-int
-rte_eth_bond_8023ad_conf_get(uint8_t port_id,
- struct rte_eth_bond_8023ad_conf *conf);
-
-/**
- * @internal
- *
- * Function set new configuration of 802.3AX mode.
- *
- * @param port_id Bonding device id
- * @param conf Configuration, if NULL set default configuration.
- * @return
- * 0 - if ok
- * -EINVAL if configuration is invalid.
- */
-int
-rte_eth_bond_8023ad_setup(uint8_t port_id,
- struct rte_eth_bond_8023ad_conf *conf);
-
-/**
- * @internal
- *
- * Function returns current state of given slave device.
- *
- * @param slave_id Port id of valid slave.
- * @param conf buffer for configuration
- * @return
- * 0 - if ok
- * -EINVAL if conf is NULL or slave id is invalid (not a slave of given
- * bonded device or is not inactive).
- */
-int
-rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
- struct rte_eth_bond_8023ad_slave_info *conf);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* RTE_ETH_BOND_8023AD_H_ */
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RTE_ETH_BOND_8023AD_PRIVATE_H_
-#define RTE_ETH_BOND_8023AD_PRIVATE_H_
-
-#include <stdint.h>
-
-#include <rte_ether.h>
-#include <rte_byteorder.h>
-#include <rte_atomic.h>
-
-#include "rte_eth_bond_8023ad.h"
-
-#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100
-/** Maximum number of packets to one slave queued in TX ring. */
-#define BOND_MODE_8023AX_SLAVE_RX_PKTS 3
-/** Maximum number of LACP packets from one slave queued in TX ring. */
-#define BOND_MODE_8023AX_SLAVE_TX_PKTS 1
-/**
- * Timeouts deffinitions (5.4.4 in 802.1AX documentation).
- */
-#define BOND_8023AD_FAST_PERIODIC_MS 900
-#define BOND_8023AD_SLOW_PERIODIC_MS 29000
-#define BOND_8023AD_SHORT_TIMEOUT_MS 3000
-#define BOND_8023AD_LONG_TIMEOUT_MS 90000
-#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000
-#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000
-#define BOND_8023AD_TX_MACHINE_PERIOD_MS 500
-#define BOND_8023AD_RX_MARKER_PERIOD_MS 2000
-
-/**
- * Interval of showing warning message from state machines. All messages will
- * be held (and gathered together) to prevent flooding.
- * This is no parto of 802.1AX standard.
- */
-#define BOND_8023AD_WARNINGS_PERIOD_MS 1000
-
-
-
-/**
- * State machine flags
- */
-#define SM_FLAGS_BEGIN 0x0001
-#define SM_FLAGS_LACP_ENABLED 0x0002
-#define SM_FLAGS_ACTOR_CHURN 0x0004
-#define SM_FLAGS_PARTNER_CHURN 0x0008
-#define SM_FLAGS_MOVED 0x0100
-#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200
-#define SM_FLAGS_NTT 0x0400
-
-#define BOND_LINK_FULL_DUPLEX_KEY 0x01
-#define BOND_LINK_SPEED_KEY_10M 0x02
-#define BOND_LINK_SPEED_KEY_100M 0x04
-#define BOND_LINK_SPEED_KEY_1000M 0x08
-#define BOND_LINK_SPEED_KEY_10G 0x10
-#define BOND_LINK_SPEED_KEY_20G 0x11
-#define BOND_LINK_SPEED_KEY_40G 0x12
-
-#define WRN_RX_MARKER_TO_FAST 0x01
-#define WRN_UNKNOWN_SLOW_TYPE 0x02
-#define WRN_UNKNOWN_MARKER_TYPE 0x04
-#define WRN_NOT_LACP_CAPABLE 0x08
-#define WRN_RX_QUEUE_FULL 0x10
-#define WRN_TX_QUEUE_FULL 0x20
-
-#define CHECK_FLAGS(_variable, _f) ((_variable) & (_f))
-#define SET_FLAGS(_variable, _f) ((_variable) |= (_f))
-#define CLEAR_FLAGS(_variable, _f) ((_variable) &= ~(_f))
-
-#define SM_FLAG(_p, _f) (!!CHECK_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f))
-#define SM_FLAG_SET(_p, _f) SET_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)
-#define SM_FLAG_CLR(_p, _f) CLEAR_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)
-
-#define ACTOR_STATE(_p, _f) (!!CHECK_FLAGS((_p)->actor_state, STATE_ ## _f))
-#define ACTOR_STATE_SET(_p, _f) SET_FLAGS((_p)->actor_state, STATE_ ## _f)
-#define ACTOR_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->actor_state, STATE_ ## _f)
-
-#define PARTNER_STATE(_p, _f) (!!CHECK_FLAGS((_p)->partner_state, STATE_ ## _f))
-#define PARTNER_STATE_SET(_p, _f) SET_FLAGS((_p)->partner_state, STATE_ ## _f)
-#define PARTNER_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->partner_state, STATE_ ## _f)
-
-/** Variables associated with each port (5.4.7 in 802.1AX documentation). */
-struct port {
- /**
- * The operational values of the Actor's state parameters. Bitmask
- * of port states.
- */
- uint8_t actor_state;
-
- /** The operational Actor's port parameters */
- struct port_params actor;
-
- /**
- * The operational value of the Actor's view of the current values of
- * the Partner's state parameters. The Actor sets this variable either
- * to the value received from the Partner in an LACPDU, or to the value
- * of Partner_Admin_Port_State. Bitmask of port states.
- */
- uint8_t partner_state;
-
- /** The operational Partner's port parameters */
- struct port_params partner;
-
- /* Additional port parameters not listed in documentation */
- /** State machine flags */
- uint16_t sm_flags;
- enum rte_bond_8023ad_selection selected;
-
- uint64_t current_while_timer;
- uint64_t periodic_timer;
- uint64_t wait_while_timer;
- uint64_t tx_machine_timer;
- uint64_t tx_marker_timer;
- /* Agregator parameters */
- /** Used aggregator port ID */
- uint16_t aggregator_port_id;
-
- /** Memory pool used to allocate rings */
- struct rte_mempool *mbuf_pool;
-
- /** Ring of LACP packets from RX burst function */
- struct rte_ring *rx_ring;
-
- /** Ring of slow protocol packets (LACP and MARKERS) to TX burst function */
- struct rte_ring *tx_ring;
-
- /** Timer which is also used as mutex. If is 0 (not running) RX marker
- * packet might be responded. Otherwise shall be dropped. It is zeroed in
- * mode 4 callback function after expire. */
- volatile uint64_t rx_marker_timer;
-
- uint64_t warning_timer;
- volatile uint16_t warnings_to_show;
-};
-
-struct mode8023ad_private {
- uint64_t fast_periodic_timeout;
- uint64_t slow_periodic_timeout;
- uint64_t short_timeout;
- uint64_t long_timeout;
- uint64_t aggregate_wait_timeout;
- uint64_t tx_period_timeout;
- uint64_t rx_marker_timeout;
- uint64_t update_timeout_us;
-};
-
-/**
- * @internal
- * The pool of *port* structures. The size of the pool
- * is configured at compile-time in the <rte_eth_bond_8023ad.c> file.
- */
-extern struct port mode_8023ad_ports[];
-
-/* Forward declaration */
-struct bond_dev_private;
-
-/**
- * @internal
- *
- * Get configuration of bonded interface.
- *
- *
- * @param dev Bonded interface
- * @param conf returned configuration
- */
-void
-bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
- struct rte_eth_bond_8023ad_conf *conf);
-
-/**
- * @internal
- *
- * Set mode 4 configuration of bonded interface.
- *
- * @pre Bonded interface must be stopped.
- *
- * @param dev Bonded interface
- * @param conf new configuration. If NULL set default configuration.
- */
-void
-bond_mode_8023ad_setup(struct rte_eth_dev *dev,
- struct rte_eth_bond_8023ad_conf *conf);
-
-/**
- * @internal
- *
- * Enables 802.1AX mode and all active slaves on bonded interface.
- *
- * @param dev Bonded interface
- * @return
- * 0 on success, negative value otherwise.
- */
-int
-bond_mode_8023ad_enable(struct rte_eth_dev *dev);
-
-/**
- * @internal
- *
- * Disables 802.1AX mode of the bonded interface and slaves.
- *
- * @param dev Bonded interface
- * @return
- * 0 on success, negative value otherwise.
- */
-int bond_mode_8023ad_disable(struct rte_eth_dev *dev);
-
-/**
- * @internal
- *
- * Starts 802.3AX state machines management logic.
- * @param dev Bonded interface
- * @return
- * 0 if machines was started, 1 if machines was already running,
- * negative value otherwise.
- */
-int
-bond_mode_8023ad_start(struct rte_eth_dev *dev);
-
-/**
- * @internal
- *
- * Stops 802.3AX state machines management logic.
- * @param dev Bonded interface
- * @return
- * 0 if this call stopped state machines, -ENOENT if alarm was not set.
- */
-void
-bond_mode_8023ad_stop(struct rte_eth_dev *dev);
-
-/**
- * @internal
- *
- * Passes given slow packet to state machines management logic.
- * @param internals Bonded device private data.
- * @param slave_id Slave port id.
- * @param slot_pkt Slow packet.
- */
-void
-bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
- uint8_t slave_id, struct rte_mbuf *pkt);
-
-/**
- * @internal
- *
- * Appends given slave used slave
- *
- * @param dev Bonded interface.
- * @param port_id Slave port ID to be added
- *
- * @return
- * 0 on success, negative value otherwise.
- */
-void
-bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint8_t port_id);
-
-/**
- * @internal
- *
- * Denitializes and removes given slave from 802.1AX mode.
- *
- * @param dev Bonded interface.
- * @param slave_num Position of slave in active_slaves array
- *
- * @return
- * 0 on success, negative value otherwise.
- */
-int
-bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos);
-
-/**
- * Updates state when MAC was changed on bonded device or one of its slaves.
- * @param bond_dev Bonded device
- */
-void
-bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev);
-
-#endif /* RTE_ETH_BOND_8023AD_H_ */
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rte_eth_bond_private.h"
-#include "rte_eth_bond_alb.h"
-
-static inline uint8_t
-simple_hash(uint8_t *hash_start, int hash_size)
-{
- int i;
- uint8_t hash;
-
- hash = 0;
- for (i = 0; i < hash_size; ++i)
- hash ^= hash_start[i];
-
- return hash;
-}
-
-static uint8_t
-calculate_slave(struct bond_dev_private *internals)
-{
- uint8_t idx;
-
- idx = (internals->mode6.last_slave + 1) % internals->active_slave_count;
- internals->mode6.last_slave = idx;
- return internals->active_slaves[idx];
-}
-
-int
-bond_mode_alb_enable(struct rte_eth_dev *bond_dev)
-{
- struct bond_dev_private *internals = bond_dev->data->dev_private;
- struct client_data *hash_table = internals->mode6.client_table;
-
- uint16_t data_size;
- char mem_name[RTE_ETH_NAME_MAX_LEN];
- int socket_id = bond_dev->pci_dev->numa_node;
-
- /* Fill hash table with initial values */
- memset(hash_table, 0, sizeof(struct client_data) * ALB_HASH_TABLE_SIZE);
- rte_spinlock_init(&internals->mode6.lock);
- internals->mode6.last_slave = ALB_NULL_INDEX;
- internals->mode6.ntt = 0;
-
- /* Initialize memory pool for ARP packets to send */
- if (internals->mode6.mempool == NULL) {
- /*
- * 256 is size of ETH header, ARP header and nested VLAN headers.
- * The value is chosen to be cache aligned.
- */
- data_size = 256 + RTE_PKTMBUF_HEADROOM;
- snprintf(mem_name, sizeof(mem_name), "%s_MODE6", bond_dev->data->name);
- internals->mode6.mempool = rte_pktmbuf_pool_create(mem_name,
- 512 * RTE_MAX_ETHPORTS,
- RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
- 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
- 0, data_size, socket_id);
-
- if (internals->mode6.mempool == NULL) {
- RTE_LOG(ERR, PMD, "%s: Failed to initialize ALB mempool.\n",
- bond_dev->data->name);
- rte_panic(
- "Failed to allocate memory pool ('%s')\n"
- "for bond device '%s'\n",
- mem_name, bond_dev->data->name);
- }
- }
-
- return 0;
-}
-
-void bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
- struct bond_dev_private *internals) {
- struct arp_hdr *arp;
-
- struct client_data *hash_table = internals->mode6.client_table;
- struct client_data *client_info;
-
- uint8_t hash_index;
-
- arp = (struct arp_hdr *) ((char *) (eth_h + 1) + offset);
-
- /* ARP Requests are forwarded to the application with no changes */
- if (arp->arp_op != rte_cpu_to_be_16(ARP_OP_REPLY))
- return;
-
- /* From now on, we analyze only ARP Reply packets */
- hash_index = simple_hash((uint8_t *) &arp->arp_data.arp_sip,
- sizeof(arp->arp_data.arp_sip));
- client_info = &hash_table[hash_index];
-
- /*
- * We got reply for ARP Request send by the application. We need to
- * update client table when received data differ from what is stored
- * in ALB table and issue sending update packet to that slave.
- */
- rte_spinlock_lock(&internals->mode6.lock);
- if (client_info->in_use == 0 ||
- client_info->app_ip != arp->arp_data.arp_tip ||
- client_info->cli_ip != arp->arp_data.arp_sip ||
- !is_same_ether_addr(&client_info->cli_mac, &arp->arp_data.arp_sha) ||
- client_info->vlan_count != offset / sizeof(struct vlan_hdr) ||
- memcmp(client_info->vlan, eth_h + 1, offset) != 0
- ) {
- client_info->in_use = 1;
- client_info->app_ip = arp->arp_data.arp_tip;
- client_info->cli_ip = arp->arp_data.arp_sip;
- ether_addr_copy(&arp->arp_data.arp_sha, &client_info->cli_mac);
- client_info->slave_idx = calculate_slave(internals);
- rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
- ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_tha);
- memcpy(client_info->vlan, eth_h + 1, offset);
- client_info->vlan_count = offset / sizeof(struct vlan_hdr);
- }
- internals->mode6.ntt = 1;
- rte_spinlock_unlock(&internals->mode6.lock);
-}
-
-uint8_t
-bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
- struct bond_dev_private *internals)
-{
- struct arp_hdr *arp;
-
- struct client_data *hash_table = internals->mode6.client_table;
- struct client_data *client_info;
-
- uint8_t hash_index;
-
- struct ether_addr bonding_mac;
-
- arp = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
-
- /*
- * Traffic with src MAC other than bonding should be sent on
- * current primary port.
- */
- rte_eth_macaddr_get(internals->port_id, &bonding_mac);
- if (!is_same_ether_addr(&bonding_mac, &arp->arp_data.arp_sha)) {
- rte_eth_macaddr_get(internals->current_primary_port,
- &arp->arp_data.arp_sha);
- return internals->current_primary_port;
- }
-
- hash_index = simple_hash((uint8_t *)&arp->arp_data.arp_tip,
- sizeof(uint32_t));
- client_info = &hash_table[hash_index];
-
- rte_spinlock_lock(&internals->mode6.lock);
- if (arp->arp_op == rte_cpu_to_be_16(ARP_OP_REPLY)) {
- if (client_info->in_use) {
- if (client_info->app_ip == arp->arp_data.arp_sip &&
- client_info->cli_ip == arp->arp_data.arp_tip) {
- /* Entry is already assigned to this client */
- if (!is_broadcast_ether_addr(&arp->arp_data.arp_tha)) {
- ether_addr_copy(&arp->arp_data.arp_tha,
- &client_info->cli_mac);
- }
- rte_eth_macaddr_get(client_info->slave_idx,
- &client_info->app_mac);
- ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
- memcpy(client_info->vlan, eth_h + 1, offset);
- client_info->vlan_count = offset / sizeof(struct vlan_hdr);
- rte_spinlock_unlock(&internals->mode6.lock);
- return client_info->slave_idx;
- }
- }
-
- /* Assign new slave to this client and update src mac in ARP */
- client_info->in_use = 1;
- client_info->ntt = 0;
- client_info->app_ip = arp->arp_data.arp_sip;
- ether_addr_copy(&arp->arp_data.arp_tha, &client_info->cli_mac);
- client_info->cli_ip = arp->arp_data.arp_tip;
- client_info->slave_idx = calculate_slave(internals);
- rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
- ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
- memcpy(client_info->vlan, eth_h + 1, offset);
- client_info->vlan_count = offset / sizeof(struct vlan_hdr);
- rte_spinlock_unlock(&internals->mode6.lock);
- return client_info->slave_idx;
- }
-
- /* If packet is not ARP Reply, send it on current primary port. */
- rte_spinlock_unlock(&internals->mode6.lock);
- rte_eth_macaddr_get(internals->current_primary_port,
- &arp->arp_data.arp_sha);
- return internals->current_primary_port;
-}
-
-uint8_t
-bond_mode_alb_arp_upd(struct client_data *client_info,
- struct rte_mbuf *pkt, struct bond_dev_private *internals)
-{
- struct ether_hdr *eth_h;
- struct arp_hdr *arp_h;
- uint8_t slave_idx;
-
- rte_spinlock_lock(&internals->mode6.lock);
- eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
-
- ether_addr_copy(&client_info->app_mac, ð_h->s_addr);
- ether_addr_copy(&client_info->cli_mac, ð_h->d_addr);
- if (client_info->vlan_count > 0)
- eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
- else
- eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP);
-
- arp_h = (struct arp_hdr *)((char *)eth_h + sizeof(struct ether_hdr)
- + client_info->vlan_count * sizeof(struct vlan_hdr));
-
- memcpy(eth_h + 1, client_info->vlan,
- client_info->vlan_count * sizeof(struct vlan_hdr));
-
- ether_addr_copy(&client_info->app_mac, &arp_h->arp_data.arp_sha);
- arp_h->arp_data.arp_sip = client_info->app_ip;
- ether_addr_copy(&client_info->cli_mac, &arp_h->arp_data.arp_tha);
- arp_h->arp_data.arp_tip = client_info->cli_ip;
-
- arp_h->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER);
- arp_h->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
- arp_h->arp_hln = ETHER_ADDR_LEN;
- arp_h->arp_pln = sizeof(uint32_t);
- arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
-
- slave_idx = client_info->slave_idx;
- rte_spinlock_unlock(&internals->mode6.lock);
-
- return slave_idx;
-}
-
-void
-bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev)
-{
- struct bond_dev_private *internals = bond_dev->data->dev_private;
- struct client_data *client_info;
-
- int i;
-
- /* If active slave count is 0, it's pointless to refresh alb table */
- if (internals->active_slave_count <= 0)
- return;
-
- rte_spinlock_lock(&internals->mode6.lock);
- internals->mode6.last_slave = ALB_NULL_INDEX;
-
- for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
- client_info = &internals->mode6.client_table[i];
- if (client_info->in_use) {
- client_info->slave_idx = calculate_slave(internals);
- rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
- internals->mode6.ntt = 1;
- }
- }
- rte_spinlock_unlock(&internals->mode6.lock);
-}
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RTE_ETH_BOND_ALB_H_
-#define RTE_ETH_BOND_ALB_H_
-
-#include <rte_ether.h>
-#include <rte_arp.h>
-
-#define ALB_HASH_TABLE_SIZE 256
-#define ALB_NULL_INDEX 0xFFFFFFFF
-
-struct client_data {
- /** ARP data of single client */
- struct ether_addr app_mac;
- /**< MAC address of application running DPDK */
- uint32_t app_ip;
- /**< IP address of application running DPDK */
- struct ether_addr cli_mac;
- /**< Client MAC address */
- uint32_t cli_ip;
- /**< Client IP address */
-
- uint8_t slave_idx;
- /**< Index of slave on which we connect with that client */
- uint8_t in_use;
- /**< Flag indicating if entry in client table is currently used */
- uint8_t ntt;
- /**< Flag indicating if we need to send update to this client on next tx */
-
- struct vlan_hdr vlan[2];
- /**< Content of vlan headers */
- uint8_t vlan_count;
- /**< Number of nested vlan headers */
-};
-
-struct mode_alb_private {
- struct client_data client_table[ALB_HASH_TABLE_SIZE];
- /**< Hash table storing ARP data of every client connected */
- struct rte_mempool *mempool;
- /**< Mempool for creating ARP update packets */
- uint8_t ntt;
- /**< Flag indicating if we need to send update to any client on next tx */
- uint32_t last_slave;
- /**< Index of last used slave in client table */
- rte_spinlock_t lock;
-};
-
-/**
- * ALB mode initialization.
- *
- * @param bond_dev Pointer to bonding device.
- *
- * @return
- * Error code - 0 on success.
- */
-int
-bond_mode_alb_enable(struct rte_eth_dev *bond_dev);
-
-/**
- * Function handles ARP packet reception. If received ARP request, it is
- * forwarded to application without changes. If it is ARP reply, client table
- * is updated.
- *
- * @param eth_h ETH header of received packet.
- * @param offset Vlan header offset.
- * @param internals Bonding data.
- */
-void
-bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
- struct bond_dev_private *internals);
-
-/**
- * Function handles ARP packet transmission. It also decides on which slave
- * send that packet. If packet is ARP Request, it is send on primary slave.
- * If it is ARP Reply, it is send on slave stored in client table for that
- * connection. On Reply function also updates data in client table.
- *
- * @param eth_h ETH header of transmitted packet.
- * @param offset Vlan header offset.
- * @param internals Bonding data.
- *
- * @return
- * Index of slave on which packet should be sent.
- */
-uint8_t
-bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
- struct bond_dev_private *internals);
-
-/**
- * Function fills packet with ARP data from client_info.
- *
- * @param client_info Data of client to which packet is sent.
- * @param pkt Pointer to packet which is sent.
- * @param internals Bonding data.
- *
- * @return
- * Index of slawe on which packet should be sent.
- */
-uint8_t
-bond_mode_alb_arp_upd(struct client_data *client_info,
- struct rte_mbuf *pkt, struct bond_dev_private *internals);
-
-/**
- * Function updates slave indexes of active connections.
- *
- * @param bond_dev Pointer to bonded device struct.
- */
-void
-bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev);
-
-#endif /* RTE_ETH_BOND_ALB_H_ */
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-
-#include <rte_mbuf.h>
-#include <rte_malloc.h>
-#include <rte_ethdev.h>
-#include <rte_tcp.h>
-
-#include "rte_eth_bond.h"
-#include "rte_eth_bond_private.h"
-#include "rte_eth_bond_8023ad_private.h"
-
-#define DEFAULT_POLLING_INTERVAL_10_MS (10)
-
-int
-valid_bonded_ethdev(struct rte_eth_dev *eth_dev)
-{
- size_t len;
-
- /* Check valid pointer */
- if (eth_dev->driver->pci_drv.name == NULL || driver_name == NULL)
- return -1;
-
- /* Check string lengths are equal */
- len = strlen(driver_name);
- if (strlen(eth_dev->driver->pci_drv.name) != len)
- return -1;
-
- /* Compare strings */
- return strncmp(eth_dev->driver->pci_drv.name, driver_name, len);
-}
-
-int
-valid_port_id(uint8_t port_id)
-{
- /* Verify that port id is valid */
- int ethdev_count = rte_eth_dev_count();
- if (port_id >= ethdev_count) {
- RTE_BOND_LOG(ERR, "Port Id %d is greater than rte_eth_dev_count %d",
- port_id, ethdev_count);
- return -1;
- }
-
- return 0;
-}
-
-int
-valid_bonded_port_id(uint8_t port_id)
-{
- /* Verify that port id's are valid */
- if (valid_port_id(port_id))
- return -1;
-
- /* Verify that bonded_port_id refers to a bonded port */
- if (valid_bonded_ethdev(&rte_eth_devices[port_id])) {
- RTE_BOND_LOG(ERR, "Specified port Id %d is not a bonded eth_dev device",
- port_id);
- return -1;
- }
-
- return 0;
-}
-
-int
-valid_slave_port_id(uint8_t port_id)
-{
- /* Verify that port id's are valid */
- if (valid_port_id(port_id))
- return -1;
-
- /* Verify that port_id refers to a non bonded port */
- if (!valid_bonded_ethdev(&rte_eth_devices[port_id]))
- return -1;
-
- return 0;
-}
-
-void
-activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
-{
- struct bond_dev_private *internals = eth_dev->data->dev_private;
- uint8_t active_count = internals->active_slave_count;
-
- if (internals->mode == BONDING_MODE_8023AD)
- bond_mode_8023ad_activate_slave(eth_dev, port_id);
-
- if (internals->mode == BONDING_MODE_TLB
- || internals->mode == BONDING_MODE_ALB) {
-
- internals->tlb_slaves_order[active_count] = port_id;
- }
-
- RTE_VERIFY(internals->active_slave_count <
- (RTE_DIM(internals->active_slaves) - 1));
-
- internals->active_slaves[internals->active_slave_count] = port_id;
- internals->active_slave_count++;
-
- if (internals->mode == BONDING_MODE_TLB)
- bond_tlb_activate_slave(internals);
- if (internals->mode == BONDING_MODE_ALB)
- bond_mode_alb_client_list_upd(eth_dev);
-}
-
-void
-deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
-{
- uint8_t slave_pos;
- struct bond_dev_private *internals = eth_dev->data->dev_private;
- uint8_t active_count = internals->active_slave_count;
-
- if (internals->mode == BONDING_MODE_8023AD) {
- bond_mode_8023ad_stop(eth_dev);
- bond_mode_8023ad_deactivate_slave(eth_dev, port_id);
- } else if (internals->mode == BONDING_MODE_TLB
- || internals->mode == BONDING_MODE_ALB)
- bond_tlb_disable(internals);
-
- slave_pos = find_slave_by_id(internals->active_slaves, active_count,
- port_id);
-
- /* If slave was not at the end of the list
- * shift active slaves up active array list */
- if (slave_pos < active_count) {
- active_count--;
- memmove(internals->active_slaves + slave_pos,
- internals->active_slaves + slave_pos + 1,
- (active_count - slave_pos) *
- sizeof(internals->active_slaves[0]));
- }
-
- RTE_VERIFY(active_count < RTE_DIM(internals->active_slaves));
- internals->active_slave_count = active_count;
-
- if (eth_dev->data->dev_started) {
- if (internals->mode == BONDING_MODE_8023AD) {
- bond_mode_8023ad_start(eth_dev);
- } else if (internals->mode == BONDING_MODE_TLB) {
- bond_tlb_enable(internals);
- } else if (internals->mode == BONDING_MODE_ALB) {
- bond_tlb_enable(internals);
- bond_mode_alb_client_list_upd(eth_dev);
- }
- }
-}
-
-uint8_t
-number_of_sockets(void)
-{
- int sockets = 0;
- int i;
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-
- for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
- if (sockets < ms[i].socket_id)
- sockets = ms[i].socket_id;
- }
-
- /* Number of sockets = maximum socket_id + 1 */
- return ++sockets;
-}
-
-const char *driver_name = "Link Bonding PMD";
-
-int
-rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
-{
- struct rte_pci_device *pci_dev = NULL;
- struct bond_dev_private *internals = NULL;
- struct rte_eth_dev *eth_dev = NULL;
- struct eth_driver *eth_drv = NULL;
- struct rte_pci_driver *pci_drv = NULL;
- struct rte_pci_id *pci_id_table = NULL;
- /* now do all data allocation - for eth_dev structure, dummy pci driver
- * and internal (private) data
- */
-
- if (name == NULL) {
- RTE_BOND_LOG(ERR, "Invalid name specified");
- goto err;
- }
-
- if (socket_id >= number_of_sockets()) {
- RTE_BOND_LOG(ERR,
- "Invalid socket id specified to create bonded device on.");
- goto err;
- }
-
- pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id);
- if (pci_dev == NULL) {
- RTE_BOND_LOG(ERR, "Unable to malloc pci dev on socket");
- goto err;
- }
-
- eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id);
- if (eth_drv == NULL) {
- RTE_BOND_LOG(ERR, "Unable to malloc eth_drv on socket");
- goto err;
- }
-
- pci_drv = ð_drv->pci_drv;
-
- pci_id_table = rte_zmalloc_socket(name, sizeof(*pci_id_table), 0, socket_id);
- if (pci_id_table == NULL) {
- RTE_BOND_LOG(ERR, "Unable to malloc pci_id_table on socket");
- goto err;
- }
- pci_id_table->device_id = PCI_ANY_ID;
- pci_id_table->subsystem_device_id = PCI_ANY_ID;
- pci_id_table->vendor_id = PCI_ANY_ID;
- pci_id_table->subsystem_vendor_id = PCI_ANY_ID;
-
- pci_drv->id_table = pci_id_table;
- pci_drv->drv_flags = RTE_PCI_DRV_INTR_LSC;
-
- internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id);
- if (internals == NULL) {
- RTE_BOND_LOG(ERR, "Unable to malloc internals on socket");
- goto err;
- }
-
- /* reserve an ethdev entry */
- eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
- if (eth_dev == NULL) {
- RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
- goto err;
- }
-
- pci_dev->numa_node = socket_id;
- pci_drv->name = driver_name;
-
- eth_dev->driver = eth_drv;
- eth_dev->data->dev_private = internals;
- eth_dev->data->nb_rx_queues = (uint16_t)1;
- eth_dev->data->nb_tx_queues = (uint16_t)1;
-
- TAILQ_INIT(&(eth_dev->link_intr_cbs));
-
- eth_dev->data->dev_link.link_status = 0;
-
- eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
- socket_id);
-
- eth_dev->data->dev_started = 0;
- eth_dev->data->promiscuous = 0;
- eth_dev->data->scattered_rx = 0;
- eth_dev->data->all_multicast = 0;
-
- eth_dev->dev_ops = &default_dev_ops;
- eth_dev->pci_dev = pci_dev;
-
- rte_spinlock_init(&internals->lock);
-
- internals->port_id = eth_dev->data->port_id;
- internals->mode = BONDING_MODE_INVALID;
- internals->current_primary_port = 0;
- internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
- internals->xmit_hash = xmit_l2_hash;
- internals->user_defined_mac = 0;
- internals->link_props_set = 0;
-
- internals->link_status_polling_enabled = 0;
-
- internals->link_status_polling_interval_ms = DEFAULT_POLLING_INTERVAL_10_MS;
- internals->link_down_delay_ms = 0;
- internals->link_up_delay_ms = 0;
-
- internals->slave_count = 0;
- internals->active_slave_count = 0;
- internals->rx_offload_capa = 0;
- internals->tx_offload_capa = 0;
-
- memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
- memset(internals->slaves, 0, sizeof(internals->slaves));
-
- /* Set mode 4 default configuration */
- bond_mode_8023ad_setup(eth_dev, NULL);
- if (bond_ethdev_mode_set(eth_dev, mode)) {
- RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
- eth_dev->data->port_id, mode);
- goto err;
- }
-
- return eth_dev->data->port_id;
-
-err:
- rte_free(pci_dev);
- rte_free(pci_id_table);
- rte_free(eth_drv);
- rte_free(internals);
-
- return -1;
-}
-
-static int
-__eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
-{
- struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
- struct bond_dev_private *internals;
- struct bond_dev_private *temp_internals;
- struct rte_eth_link link_props;
- struct rte_eth_dev_info dev_info;
-
- int i, j;
-
- if (valid_slave_port_id(slave_port_id) != 0)
- return -1;
-
- bonded_eth_dev = &rte_eth_devices[bonded_port_id];
- internals = bonded_eth_dev->data->dev_private;
-
- /* Verify that new slave device is not already a slave of another
- * bonded device */
- for (i = rte_eth_dev_count()-1; i >= 0; i--) {
- if (valid_bonded_ethdev(&rte_eth_devices[i]) == 0) {
- temp_internals = rte_eth_devices[i].data->dev_private;
-
- for (j = 0; j < temp_internals->slave_count; j++) {
- /* Device already a slave of a bonded device */
- if (temp_internals->slaves[j].port_id == slave_port_id) {
- RTE_BOND_LOG(ERR, "Slave port %d is already a slave",
- slave_port_id);
- return -1;
- }
- }
- }
- }
-
- slave_eth_dev = &rte_eth_devices[slave_port_id];
-
- /* Add slave details to bonded device */
- slave_add(internals, slave_eth_dev);
-
- memset(&dev_info, 0, sizeof(dev_info));
- rte_eth_dev_info_get(slave_port_id, &dev_info);
-
- if (internals->slave_count < 1) {
- /* if MAC is not user defined then use MAC of first slave add to
- * bonded device */
- if (!internals->user_defined_mac)
- mac_address_set(bonded_eth_dev, slave_eth_dev->data->mac_addrs);
-
- /* Inherit eth dev link properties from first slave */
- link_properties_set(bonded_eth_dev,
- &(slave_eth_dev->data->dev_link));
-
- /* Make primary slave */
- internals->primary_port = slave_port_id;
-
- /* Take the first dev's offload capabilities */
- internals->rx_offload_capa = dev_info.rx_offload_capa;
- internals->tx_offload_capa = dev_info.tx_offload_capa;
-
- } else {
- /* Check slave link properties are supported if props are set,
- * all slaves must be the same */
- if (internals->link_props_set) {
- if (link_properties_valid(&(bonded_eth_dev->data->dev_link),
- &(slave_eth_dev->data->dev_link))) {
- RTE_BOND_LOG(ERR,
- "Slave port %d link speed/duplex not supported",
- slave_port_id);
- return -1;
- }
- } else {
- link_properties_set(bonded_eth_dev,
- &(slave_eth_dev->data->dev_link));
- }
- internals->rx_offload_capa &= dev_info.rx_offload_capa;
- internals->tx_offload_capa &= dev_info.tx_offload_capa;
- }
-
- internals->slave_count++;
-
- /* Update all slave devices MACs*/
- mac_address_slaves_update(bonded_eth_dev);
-
- if (bonded_eth_dev->data->dev_started) {
- if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) {
- RTE_BOND_LOG(ERR, "rte_bond_slaves_configure: port=%d",
- slave_port_id);
- return -1;
- }
- }
-
- /* Register link status change callback with bonded device pointer as
- * argument*/
- rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
- bond_ethdev_lsc_event_callback, &bonded_eth_dev->data->port_id);
-
- /* If bonded device is started then we can add the slave to our active
- * slave array */
- if (bonded_eth_dev->data->dev_started) {
- rte_eth_link_get_nowait(slave_port_id, &link_props);
-
- if (link_props.link_status == 1)
- activate_slave(bonded_eth_dev, slave_port_id);
- }
- return 0;
-
-}
-
-int
-rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
-{
- struct rte_eth_dev *bonded_eth_dev;
- struct bond_dev_private *internals;
-
- int retval;
-
- /* Verify that port id's are valid bonded and slave ports */
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- bonded_eth_dev = &rte_eth_devices[bonded_port_id];
- internals = bonded_eth_dev->data->dev_private;
-
- rte_spinlock_lock(&internals->lock);
-
- retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id);
-
- rte_spinlock_unlock(&internals->lock);
-
- return retval;
-}
-
-static int
-__eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
-{
- struct rte_eth_dev *bonded_eth_dev;
- struct bond_dev_private *internals;
-
- int i, slave_idx;
-
- if (valid_slave_port_id(slave_port_id) != 0)
- return -1;
-
- bonded_eth_dev = &rte_eth_devices[bonded_port_id];
- internals = bonded_eth_dev->data->dev_private;
-
- /* first remove from active slave list */
- slave_idx = find_slave_by_id(internals->active_slaves,
- internals->active_slave_count, slave_port_id);
-
- if (slave_idx < internals->active_slave_count)
- deactivate_slave(bonded_eth_dev, slave_port_id);
-
- slave_idx = -1;
- /* now find in slave list */
- for (i = 0; i < internals->slave_count; i++)
- if (internals->slaves[i].port_id == slave_port_id) {
- slave_idx = i;
- break;
- }
-
- if (slave_idx < 0) {
- RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %d",
- internals->slave_count);
- return -1;
- }
-
- /* Un-register link status change callback with bonded device pointer as
- * argument*/
- rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
- bond_ethdev_lsc_event_callback,
- &rte_eth_devices[bonded_port_id].data->port_id);
-
- /* Restore original MAC address of slave device */
- mac_address_set(&rte_eth_devices[slave_port_id],
- &(internals->slaves[slave_idx].persisted_mac_addr));
-
- slave_remove(internals, &rte_eth_devices[slave_port_id]);
-
- /* first slave in the active list will be the primary by default,
- * otherwise use first device in list */
- if (internals->current_primary_port == slave_port_id) {
- if (internals->active_slave_count > 0)
- internals->current_primary_port = internals->active_slaves[0];
- else if (internals->slave_count > 0)
- internals->current_primary_port = internals->slaves[0].port_id;
- else
- internals->primary_port = 0;
- }
-
- if (internals->active_slave_count < 1) {
- /* reset device link properties as no slaves are active */
- link_properties_reset(&rte_eth_devices[bonded_port_id]);
-
- /* if no slaves are any longer attached to bonded device and MAC is not
- * user defined then clear MAC of bonded device as it will be reset
- * when a new slave is added */
- if (internals->slave_count < 1 && !internals->user_defined_mac)
- memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0,
- sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs)));
- }
- if (internals->slave_count == 0) {
- internals->rx_offload_capa = 0;
- internals->tx_offload_capa = 0;
- }
- return 0;
-}
-
-int
-rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
-{
- struct rte_eth_dev *bonded_eth_dev;
- struct bond_dev_private *internals;
- int retval;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- bonded_eth_dev = &rte_eth_devices[bonded_port_id];
- internals = bonded_eth_dev->data->dev_private;
-
- rte_spinlock_lock(&internals->lock);
-
- retval = __eth_bond_slave_remove_lock_free(bonded_port_id, slave_port_id);
-
- rte_spinlock_unlock(&internals->lock);
-
- return retval;
-}
-
-int
-rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode)
-{
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- return bond_ethdev_mode_set(&rte_eth_devices[bonded_port_id], mode);
-}
-
-int
-rte_eth_bond_mode_get(uint8_t bonded_port_id)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- return internals->mode;
-}
-
-int
-rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- if (valid_slave_port_id(slave_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- internals->user_defined_primary_port = 1;
- internals->primary_port = slave_port_id;
-
- bond_ethdev_primary_set(internals, slave_port_id);
-
- return 0;
-}
-
-int
-rte_eth_bond_primary_get(uint8_t bonded_port_id)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- if (internals->slave_count < 1)
- return -1;
-
- return internals->current_primary_port;
-}
-
-int
-rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
-{
- struct bond_dev_private *internals;
- uint8_t i;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- if (slaves == NULL)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- if (internals->slave_count > len)
- return -1;
-
- for (i = 0; i < internals->slave_count; i++)
- slaves[i] = internals->slaves[i].port_id;
-
- return internals->slave_count;
-}
-
-int
-rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
- uint8_t len)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- if (slaves == NULL)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- if (internals->active_slave_count > len)
- return -1;
-
- memcpy(slaves, internals->active_slaves, internals->active_slave_count);
-
- return internals->active_slave_count;
-}
-
-int
-rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
- struct ether_addr *mac_addr)
-{
- struct rte_eth_dev *bonded_eth_dev;
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- bonded_eth_dev = &rte_eth_devices[bonded_port_id];
- internals = bonded_eth_dev->data->dev_private;
-
- /* Set MAC Address of Bonded Device */
- if (mac_address_set(bonded_eth_dev, mac_addr))
- return -1;
-
- internals->user_defined_mac = 1;
-
- /* Update all slave devices MACs*/
- if (internals->slave_count > 0)
- return mac_address_slaves_update(bonded_eth_dev);
-
- return 0;
-}
-
-int
-rte_eth_bond_mac_address_reset(uint8_t bonded_port_id)
-{
- struct rte_eth_dev *bonded_eth_dev;
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- bonded_eth_dev = &rte_eth_devices[bonded_port_id];
- internals = bonded_eth_dev->data->dev_private;
-
- internals->user_defined_mac = 0;
-
- if (internals->slave_count > 0) {
- /* Set MAC Address of Bonded Device */
- if (mac_address_set(bonded_eth_dev,
- &internals->slaves[internals->primary_port].persisted_mac_addr)
- != 0) {
- RTE_BOND_LOG(ERR, "Failed to set MAC address on bonded device");
- return -1;
- }
- /* Update all slave devices MAC addresses */
- return mac_address_slaves_update(bonded_eth_dev);
- }
- /* No need to update anything as no slaves present */
- return 0;
-}
-
-int
-rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- switch (policy) {
- case BALANCE_XMIT_POLICY_LAYER2:
- internals->balance_xmit_policy = policy;
- internals->xmit_hash = xmit_l2_hash;
- break;
- case BALANCE_XMIT_POLICY_LAYER23:
- internals->balance_xmit_policy = policy;
- internals->xmit_hash = xmit_l23_hash;
- break;
- case BALANCE_XMIT_POLICY_LAYER34:
- internals->balance_xmit_policy = policy;
- internals->xmit_hash = xmit_l34_hash;
- break;
-
- default:
- return -1;
- }
- return 0;
-}
-
-int
-rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- return internals->balance_xmit_policy;
-}
-
-int
-rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
- internals->link_status_polling_interval_ms = internal_ms;
-
- return 0;
-}
-
-int
-rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- return internals->link_status_polling_interval_ms;
-}
-
-int
-rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
-
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
- internals->link_down_delay_ms = delay_ms;
-
- return 0;
-}
-
-int
-rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- return internals->link_down_delay_ms;
-}
-
-int
-rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
-
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
- internals->link_up_delay_ms = delay_ms;
-
- return 0;
-}
-
-int
-rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id)
-{
- struct bond_dev_private *internals;
-
- if (valid_bonded_port_id(bonded_port_id) != 0)
- return -1;
-
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
-
- return internals->link_up_delay_ms;
-}
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <rte_devargs.h>
-#include <rte_kvargs.h>
-
-#include <cmdline_parse.h>
-#include <cmdline_parse_etheraddr.h>
-
-#include "rte_eth_bond.h"
-#include "rte_eth_bond_private.h"
-
-const char *pmd_bond_init_valid_arguments[] = {
- PMD_BOND_SLAVE_PORT_KVARG,
- PMD_BOND_PRIMARY_SLAVE_KVARG,
- PMD_BOND_MODE_KVARG,
- PMD_BOND_XMIT_POLICY_KVARG,
- PMD_BOND_SOCKET_ID_KVARG,
- PMD_BOND_MAC_ADDR_KVARG,
-
- NULL
-};
-
-static inline int
-find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr)
-{
- struct rte_pci_addr *eth_pci_addr;
- unsigned i;
-
- for (i = 0; i < rte_eth_dev_count(); i++) {
-
- if (rte_eth_devices[i].pci_dev == NULL)
- continue;
-
- eth_pci_addr = &(rte_eth_devices[i].pci_dev->addr);
-
- if (pci_addr->bus == eth_pci_addr->bus &&
- pci_addr->devid == eth_pci_addr->devid &&
- pci_addr->domain == eth_pci_addr->domain &&
- pci_addr->function == eth_pci_addr->function)
- return i;
- }
- return -1;
-}
-
-static inline int
-find_port_id_by_dev_name(const char *name)
-{
- unsigned i;
-
- for (i = 0; i < rte_eth_dev_count(); i++) {
- if (rte_eth_devices[i].data == NULL)
- continue;
-
- if (strcmp(rte_eth_devices[i].data->name, name) == 0)
- return i;
- }
- return -1;
-}
-
-/**
- * Parses a port identifier string to a port id by pci address, then by name,
- * and finally port id.
- */
-static inline int
-parse_port_id(const char *port_str)
-{
- struct rte_pci_addr dev_addr;
- int port_id;
-
- /* try parsing as pci address, physical devices */
- if (eal_parse_pci_DomBDF(port_str, &dev_addr) == 0) {
- port_id = find_port_id_by_pci_addr(&dev_addr);
- if (port_id < 0)
- return -1;
- } else {
- /* try parsing as device name, virtual devices */
- port_id = find_port_id_by_dev_name(port_str);
- if (port_id < 0) {
- char *end;
- errno = 0;
-
- /* try parsing as port id */
- port_id = strtol(port_str, &end, 10);
- if (*end != 0 || errno != 0)
- return -1;
- }
- }
-
- if (port_id < 0 || port_id > RTE_MAX_ETHPORTS) {
- RTE_BOND_LOG(ERR, "Slave port specified (%s) outside expected range",
- port_str);
- return -1;
- }
- return port_id;
-}
-
-int
-bond_ethdev_parse_slave_port_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- struct bond_ethdev_slave_ports *slave_ports;
-
- if (value == NULL || extra_args == NULL)
- return -1;
-
- slave_ports = extra_args;
-
- if (strcmp(key, PMD_BOND_SLAVE_PORT_KVARG) == 0) {
- int port_id = parse_port_id(value);
- if (port_id < 0) {
- RTE_BOND_LOG(ERR, "Invalid slave port value (%s) specified", value);
- return -1;
- } else
- slave_ports->slaves[slave_ports->slave_count++] =
- (uint8_t)port_id;
- }
- return 0;
-}
-
-int
-bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- uint8_t *mode;
- char *endptr;
-
- if (value == NULL || extra_args == NULL)
- return -1;
-
- mode = extra_args;
-
- errno = 0;
- *mode = strtol(value, &endptr, 10);
- if (*endptr != 0 || errno != 0)
- return -1;
-
- /* validate mode value */
- switch (*mode) {
- case BONDING_MODE_ROUND_ROBIN:
- case BONDING_MODE_ACTIVE_BACKUP:
- case BONDING_MODE_BALANCE:
- case BONDING_MODE_BROADCAST:
- case BONDING_MODE_8023AD:
- case BONDING_MODE_TLB:
- case BONDING_MODE_ALB:
- return 0;
- default:
- RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
- return -1;
- }
-}
-
-int
-bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- int socket_id;
- char *endptr;
-
- if (value == NULL || extra_args == NULL)
- return -1;
-
- errno = 0;
- socket_id = (uint8_t)strtol(value, &endptr, 10);
- if (*endptr != 0 || errno != 0)
- return -1;
-
- /* validate mode value */
- if (socket_id >= 0 && socket_id < number_of_sockets()) {
- *(uint8_t *)extra_args = (uint8_t)socket_id;
- return 0;
- }
- return -1;
-}
-
-int
-bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- int primary_slave_port_id;
-
- if (value == NULL || extra_args == NULL)
- return -1;
-
- primary_slave_port_id = parse_port_id(value);
- if (primary_slave_port_id < 0)
- return -1;
-
- *(uint8_t *)extra_args = (uint8_t)primary_slave_port_id;
-
- return 0;
-}
-
-int
-bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- uint8_t *xmit_policy;
-
- if (value == NULL || extra_args == NULL)
- return -1;
-
- xmit_policy = extra_args;
-
- if (strcmp(PMD_BOND_XMIT_POLICY_LAYER2_KVARG, value) == 0)
- *xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
- else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER23_KVARG, value) == 0)
- *xmit_policy = BALANCE_XMIT_POLICY_LAYER23;
- else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER34_KVARG, value) == 0)
- *xmit_policy = BALANCE_XMIT_POLICY_LAYER34;
- else
- return -1;
-
- return 0;
-}
-
-int
-bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- if (value == NULL || extra_args == NULL)
- return -1;
-
- /* Parse MAC */
- return cmdline_parse_etheraddr(NULL, value, extra_args,
- sizeof(struct ether_addr));
-}
-
-int
-bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- uint32_t time_ms;
- char *endptr;
-
- if (value == NULL || extra_args == NULL)
- return -1;
-
- errno = 0;
- time_ms = (uint32_t)strtol(value, &endptr, 10);
- if (*endptr != 0 || errno != 0)
- return -1;
-
- *(uint32_t *)extra_args = time_ms;
-
- return 0;
-}
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <stdlib.h>
-#include <netinet/in.h>
-
-#include <rte_mbuf.h>
-#include <rte_malloc.h>
-#include <rte_ethdev.h>
-#include <rte_tcp.h>
-#include <rte_udp.h>
-#include <rte_ip.h>
-#include <rte_devargs.h>
-#include <rte_kvargs.h>
-#include <rte_dev.h>
-#include <rte_alarm.h>
-#include <rte_cycles.h>
-
-#include "rte_eth_bond.h"
-#include "rte_eth_bond_private.h"
-#include "rte_eth_bond_8023ad_private.h"
-
-#define REORDER_PERIOD_MS 10
-
-#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
-
-/* Table for statistics in mode 5 TLB */
-static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
-
-static inline size_t
-get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
-{
- size_t vlan_offset = 0;
-
- if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
- struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
-
- vlan_offset = sizeof(struct vlan_hdr);
- *proto = vlan_hdr->eth_proto;
-
- if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
- vlan_hdr = vlan_hdr + 1;
- *proto = vlan_hdr->eth_proto;
- vlan_offset += sizeof(struct vlan_hdr);
- }
- }
- return vlan_offset;
-}
-
-static uint16_t
-bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
-{
- struct bond_dev_private *internals;
-
- uint16_t num_rx_slave = 0;
- uint16_t num_rx_total = 0;
-
- int i;
-
- /* Cast to structure, containing bonded device's port id and queue id */
- struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
-
- internals = bd_rx_q->dev_private;
-
-
- for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
- /* Offset of pointer to *bufs increases as packets are received
- * from other slaves */
- num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
- bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
- if (num_rx_slave) {
- num_rx_total += num_rx_slave;
- nb_pkts -= num_rx_slave;
- }
- }
-
- return num_rx_total;
-}
-
-static uint16_t
-bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
- uint16_t nb_pkts)
-{
- struct bond_dev_private *internals;
-
- /* Cast to structure, containing bonded device's port id and queue id */
- struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
-
- internals = bd_rx_q->dev_private;
-
- return rte_eth_rx_burst(internals->current_primary_port,
- bd_rx_q->queue_id, bufs, nb_pkts);
-}
-
-static uint16_t
-bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
- uint16_t nb_pkts)
-{
- /* Cast to structure, containing bonded device's port id and queue id */
- struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
- struct bond_dev_private *internals = bd_rx_q->dev_private;
- struct ether_addr bond_mac;
-
- struct ether_hdr *hdr;
-
- const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
- uint16_t num_rx_total = 0; /* Total number of received packets */
- uint8_t slaves[RTE_MAX_ETHPORTS];
- uint8_t slave_count;
-
- uint8_t collecting; /* current slave collecting status */
- const uint8_t promisc = internals->promiscuous_en;
- uint8_t i, j, k;
-
- rte_eth_macaddr_get(internals->port_id, &bond_mac);
- /* Copy slave list to protect against slave up/down changes during tx
- * bursting */
- slave_count = internals->active_slave_count;
- memcpy(slaves, internals->active_slaves,
- sizeof(internals->active_slaves[0]) * slave_count);
-
- for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
- j = num_rx_total;
- collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
-
- /* Read packets from this slave */
- num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
- &bufs[num_rx_total], nb_pkts - num_rx_total);
-
- for (k = j; k < 2 && k < num_rx_total; k++)
- rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
-
- /* Handle slow protocol packets. */
- while (j < num_rx_total) {
- if (j + 3 < num_rx_total)
- rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
-
- hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
- /* Remove packet from array if it is slow packet or slave is not
- * in collecting state or bondign interface is not in promiscus
- * mode and packet address does not match. */
- if (unlikely(hdr->ether_type == ether_type_slow_be ||
- !collecting || (!promisc &&
- !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
-
- if (hdr->ether_type == ether_type_slow_be) {
- bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
- bufs[j]);
- } else
- rte_pktmbuf_free(bufs[j]);
-
- /* Packet is managed by mode 4 or dropped, shift the array */
- num_rx_total--;
- if (j < num_rx_total) {
- memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
- (num_rx_total - j));
- }
- } else
- j++;
- }
- }
-
- return num_rx_total;
-}
-
-#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
-uint32_t burstnumberRX;
-uint32_t burstnumberTX;
-
-#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
-
-static void
-arp_op_name(uint16_t arp_op, char *buf)
-{
- switch (arp_op) {
- case ARP_OP_REQUEST:
- snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
- return;
- case ARP_OP_REPLY:
- snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
- return;
- case ARP_OP_REVREQUEST:
- snprintf(buf, sizeof("Reverse ARP Request"), "%s",
- "Reverse ARP Request");
- return;
- case ARP_OP_REVREPLY:
- snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
- "Reverse ARP Reply");
- return;
- case ARP_OP_INVREQUEST:
- snprintf(buf, sizeof("Peer Identify Request"), "%s",
- "Peer Identify Request");
- return;
- case ARP_OP_INVREPLY:
- snprintf(buf, sizeof("Peer Identify Reply"), "%s",
- "Peer Identify Reply");
- return;
- default:
- break;
- }
- snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
- return;
-}
-#endif
-#define MaxIPv4String 16
-static void
-ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
-{
- uint32_t ipv4_addr;
-
- ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
- snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
- (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
- ipv4_addr & 0xFF);
-}
-
-#define MAX_CLIENTS_NUMBER 128
-uint8_t active_clients;
-struct client_stats_t {
- uint8_t port;
- uint32_t ipv4_addr;
- uint32_t ipv4_rx_packets;
- uint32_t ipv4_tx_packets;
-};
-struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
-
-static void
-update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
-{
- int i = 0;
-
- for (; i < MAX_CLIENTS_NUMBER; i++) {
- if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
- /* Just update RX packets number for this client */
- if (TXorRXindicator == &burstnumberRX)
- client_stats[i].ipv4_rx_packets++;
- else
- client_stats[i].ipv4_tx_packets++;
- return;
- }
- }
- /* We have a new client. Insert him to the table, and increment stats */
- if (TXorRXindicator == &burstnumberRX)
- client_stats[active_clients].ipv4_rx_packets++;
- else
- client_stats[active_clients].ipv4_tx_packets++;
- client_stats[active_clients].ipv4_addr = addr;
- client_stats[active_clients].port = port;
- active_clients++;
-
-}
-
-#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
-#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
- RTE_LOG(DEBUG, PMD, \
- "%s " \
- "port:%d " \
- "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
- "SrcIP:%s " \
- "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
- "DstIP:%s " \
- "%s " \
- "%d\n", \
- info, \
- port, \
- eth_h->s_addr.addr_bytes[0], \
- eth_h->s_addr.addr_bytes[1], \
- eth_h->s_addr.addr_bytes[2], \
- eth_h->s_addr.addr_bytes[3], \
- eth_h->s_addr.addr_bytes[4], \
- eth_h->s_addr.addr_bytes[5], \
- src_ip, \
- eth_h->d_addr.addr_bytes[0], \
- eth_h->d_addr.addr_bytes[1], \
- eth_h->d_addr.addr_bytes[2], \
- eth_h->d_addr.addr_bytes[3], \
- eth_h->d_addr.addr_bytes[4], \
- eth_h->d_addr.addr_bytes[5], \
- dst_ip, \
- arp_op, \
- ++burstnumber)
-#endif
-
-static void
-mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
- uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
-{
- struct ipv4_hdr *ipv4_h;
-#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
- struct arp_hdr *arp_h;
- char dst_ip[16];
- char ArpOp[24];
- char buf[16];
-#endif
- char src_ip[16];
-
- uint16_t ether_type = eth_h->ether_type;
- uint16_t offset = get_vlan_offset(eth_h, ðer_type);
-
-#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
- snprintf(buf, 16, "%s", info);
-#endif
-
- if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
- ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
- ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
-#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
- ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
- MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
-#endif
- update_client_stats(ipv4_h->src_addr, port, burstnumber);
- }
-#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
- else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
- arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
- ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
- ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
- arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
- MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
- }
-#endif
-}
-#endif
-
-static uint16_t
-bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
-{
- struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
- struct bond_dev_private *internals = bd_tx_q->dev_private;
- struct ether_hdr *eth_h;
- uint16_t ether_type, offset;
- uint16_t nb_recv_pkts;
- int i;
-
- nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
-
- for (i = 0; i < nb_recv_pkts; i++) {
- eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
- ether_type = eth_h->ether_type;
- offset = get_vlan_offset(eth_h, ðer_type);
-
- if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
-#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
- mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
-#endif
- bond_mode_alb_arp_recv(eth_h, offset, internals);
- }
-#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
- else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
- mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
-#endif
- }
-
- return nb_recv_pkts;
-}
-
-static uint16_t
-bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
- uint16_t nb_pkts)
-{
- struct bond_dev_private *internals;
- struct bond_tx_queue *bd_tx_q;
-
- struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
- uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-
- uint8_t num_of_slaves;
- uint8_t slaves[RTE_MAX_ETHPORTS];
-
- uint16_t num_tx_total = 0, num_tx_slave;
-
- static int slave_idx = 0;
- int i, cslave_idx = 0, tx_fail_total = 0;
-
- bd_tx_q = (struct bond_tx_queue *)queue;
- internals = bd_tx_q->dev_private;
-
- /* Copy slave list to protect against slave up/down changes during tx
- * bursting */
- num_of_slaves = internals->active_slave_count;
- memcpy(slaves, internals->active_slaves,
- sizeof(internals->active_slaves[0]) * num_of_slaves);
-
- if (num_of_slaves < 1)
- return num_tx_total;
-
- /* Populate slaves mbuf with which packets are to be sent on it */
- for (i = 0; i < nb_pkts; i++) {
- cslave_idx = (slave_idx + i) % num_of_slaves;
- slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
- }
-
- /* increment current slave index so the next call to tx burst starts on the
- * next slave */
- slave_idx = ++cslave_idx;
-
- /* Send packet burst on each slave device */
- for (i = 0; i < num_of_slaves; i++) {
- if (slave_nb_pkts[i] > 0) {
- num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
- slave_bufs[i], slave_nb_pkts[i]);
-
- /* if tx burst fails move packets to end of bufs */
- if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
- int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
-
- tx_fail_total += tx_fail_slave;
-
- memcpy(&bufs[nb_pkts - tx_fail_total],
- &slave_bufs[i][num_tx_slave],
- tx_fail_slave * sizeof(bufs[0]));
- }
- num_tx_total += num_tx_slave;
- }
- }
-
- return num_tx_total;
-}
-
-static uint16_t
-bond_ethdev_tx_burst_active_backup(void *queue,
- struct rte_mbuf **bufs, uint16_t nb_pkts)
-{
- struct bond_dev_private *internals;
- struct bond_tx_queue *bd_tx_q;
-
- bd_tx_q = (struct bond_tx_queue *)queue;
- internals = bd_tx_q->dev_private;
-
- if (internals->active_slave_count < 1)
- return 0;
-
- return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
- bufs, nb_pkts);
-}
-
-static inline uint16_t
-ether_hash(struct ether_hdr *eth_hdr)
-{
- uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
- uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
-
- return (word_src_addr[0] ^ word_dst_addr[0]) ^
- (word_src_addr[1] ^ word_dst_addr[1]) ^
- (word_src_addr[2] ^ word_dst_addr[2]);
-}
-
-static inline uint32_t
-ipv4_hash(struct ipv4_hdr *ipv4_hdr)
-{
- return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
-}
-
-static inline uint32_t
-ipv6_hash(struct ipv6_hdr *ipv6_hdr)
-{
- uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
- uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
-
- return (word_src_addr[0] ^ word_dst_addr[0]) ^
- (word_src_addr[1] ^ word_dst_addr[1]) ^
- (word_src_addr[2] ^ word_dst_addr[2]) ^
- (word_src_addr[3] ^ word_dst_addr[3]);
-}
-
-uint16_t
-xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
-{
- struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
-
- uint32_t hash = ether_hash(eth_hdr);
-
- return (hash ^= hash >> 8) % slave_count;
-}
-
-uint16_t
-xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
-{
- struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
- uint16_t proto = eth_hdr->ether_type;
- size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
- uint32_t hash, l3hash = 0;
-
- hash = ether_hash(eth_hdr);
-
- if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
- struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
- ((char *)(eth_hdr + 1) + vlan_offset);
- l3hash = ipv4_hash(ipv4_hdr);
-
- } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
- struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
- ((char *)(eth_hdr + 1) + vlan_offset);
- l3hash = ipv6_hash(ipv6_hdr);
- }
-
- hash = hash ^ l3hash;
- hash ^= hash >> 16;
- hash ^= hash >> 8;
-
- return hash % slave_count;
-}
-
-uint16_t
-xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
-{
- struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
- uint16_t proto = eth_hdr->ether_type;
- size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
-
- struct udp_hdr *udp_hdr = NULL;
- struct tcp_hdr *tcp_hdr = NULL;
- uint32_t hash, l3hash = 0, l4hash = 0;
-
- if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
- struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
- ((char *)(eth_hdr + 1) + vlan_offset);
- size_t ip_hdr_offset;
-
- l3hash = ipv4_hash(ipv4_hdr);
-
- ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
- IPV4_IHL_MULTIPLIER;
-
- if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
- tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
- ip_hdr_offset);
- l4hash = HASH_L4_PORTS(tcp_hdr);
- } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
- udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
- ip_hdr_offset);
- l4hash = HASH_L4_PORTS(udp_hdr);
- }
- } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
- struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
- ((char *)(eth_hdr + 1) + vlan_offset);
- l3hash = ipv6_hash(ipv6_hdr);
-
- if (ipv6_hdr->proto == IPPROTO_TCP) {
- tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
- l4hash = HASH_L4_PORTS(tcp_hdr);
- } else if (ipv6_hdr->proto == IPPROTO_UDP) {
- udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
- l4hash = HASH_L4_PORTS(udp_hdr);
- }
- }
-
- hash = l3hash ^ l4hash;
- hash ^= hash >> 16;
- hash ^= hash >> 8;
-
- return hash % slave_count;
-}
-
-struct bwg_slave {
- uint64_t bwg_left_int;
- uint64_t bwg_left_remainder;
- uint8_t slave;
-};
-
-void
-bond_tlb_activate_slave(struct bond_dev_private *internals) {
- int i;
-
- for (i = 0; i < internals->active_slave_count; i++) {
- tlb_last_obytets[internals->active_slaves[i]] = 0;
- }
-}
-
-static int
-bandwidth_cmp(const void *a, const void *b)
-{
- const struct bwg_slave *bwg_a = a;
- const struct bwg_slave *bwg_b = b;
- int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
- int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
- (int64_t)bwg_a->bwg_left_remainder;
- if (diff > 0)
- return 1;
- else if (diff < 0)
- return -1;
- else if (diff2 > 0)
- return 1;
- else if (diff2 < 0)
- return -1;
- else
- return 0;
-}
-
-static void
-bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
- struct bwg_slave *bwg_slave)
-{
- struct rte_eth_link link_status;
-
- rte_eth_link_get(port_id, &link_status);
- uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
- if (link_bwg == 0)
- return;
- link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
- bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
- bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
-}
-
-static void
-bond_ethdev_update_tlb_slave_cb(void *arg)
-{
- struct bond_dev_private *internals = arg;
- struct rte_eth_stats slave_stats;
- struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
- uint8_t slave_count;
- uint64_t tx_bytes;
-
- uint8_t update_stats = 0;
- uint8_t i, slave_id;
-
- internals->slave_update_idx++;
-
-
- if (internals->slave_update_idx >= REORDER_PERIOD_MS)
- update_stats = 1;
-
- for (i = 0; i < internals->active_slave_count; i++) {
- slave_id = internals->active_slaves[i];
- rte_eth_stats_get(slave_id, &slave_stats);
- tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
- bandwidth_left(slave_id, tx_bytes,
- internals->slave_update_idx, &bwg_array[i]);
- bwg_array[i].slave = slave_id;
-
- if (update_stats) {
- tlb_last_obytets[slave_id] = slave_stats.obytes;
- }
- }
-
- if (update_stats == 1)
- internals->slave_update_idx = 0;
-
- slave_count = i;
- qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
- for (i = 0; i < slave_count; i++)
- internals->tlb_slaves_order[i] = bwg_array[i].slave;
-
- rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
- (struct bond_dev_private *)internals);
-}
-
-static uint16_t
-bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
-{
- struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
- struct bond_dev_private *internals = bd_tx_q->dev_private;
-
- struct rte_eth_dev *primary_port =
- &rte_eth_devices[internals->primary_port];
- uint16_t num_tx_total = 0;
- uint8_t i, j;
-
- uint8_t num_of_slaves = internals->active_slave_count;
- uint8_t slaves[RTE_MAX_ETHPORTS];
-
- struct ether_hdr *ether_hdr;
- struct ether_addr primary_slave_addr;
- struct ether_addr active_slave_addr;
-
- if (num_of_slaves < 1)
- return num_tx_total;
-
- memcpy(slaves, internals->tlb_slaves_order,
- sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
-
-
- ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
-
- if (nb_pkts > 3) {
- for (i = 0; i < 3; i++)
- rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
- }
-
- for (i = 0; i < num_of_slaves; i++) {
- rte_eth_macaddr_get(slaves[i], &active_slave_addr);
- for (j = num_tx_total; j < nb_pkts; j++) {
- if (j + 3 < nb_pkts)
- rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
-
- ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
- if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
- ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
-#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
- mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
-#endif
- }
-
- num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
- bufs + num_tx_total, nb_pkts - num_tx_total);
-
- if (num_tx_total == nb_pkts)
- break;
- }
-
- return num_tx_total;
-}
-
-void
-bond_tlb_disable(struct bond_dev_private *internals)
-{
- rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
-}
-
-void
-bond_tlb_enable(struct bond_dev_private *internals)
-{
- bond_ethdev_update_tlb_slave_cb(internals);
-}
-
-static uint16_t
-bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
-{
- struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
- struct bond_dev_private *internals = bd_tx_q->dev_private;
-
- struct ether_hdr *eth_h;
- uint16_t ether_type, offset;
-
- struct client_data *client_info;
-
- /*
- * We create transmit buffers for every slave and one additional to send
- * through tlb. In worst case every packet will be send on one port.
- */
- struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
- uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
-
- /*
- * We create separate transmit buffers for update packets as they wont be
- * counted in num_tx_total.
- */
- struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
- uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
-
- struct rte_mbuf *upd_pkt;
- size_t pkt_size;
-
- uint16_t num_send, num_not_send = 0;
- uint16_t num_tx_total = 0;
- uint8_t slave_idx;
-
- int i, j;
-
- /* Search tx buffer for ARP packets and forward them to alb */
- for (i = 0; i < nb_pkts; i++) {
- eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
- ether_type = eth_h->ether_type;
- offset = get_vlan_offset(eth_h, ðer_type);
-
- if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
- slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
-
- /* Change src mac in eth header */
- rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
-
- /* Add packet to slave tx buffer */
- slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
- slave_bufs_pkts[slave_idx]++;
- } else {
- /* If packet is not ARP, send it with TLB policy */
- slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
- bufs[i];
- slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
- }
- }
-
- /* Update connected client ARP tables */
- if (internals->mode6.ntt) {
- for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
- client_info = &internals->mode6.client_table[i];
-
- if (client_info->in_use) {
- /* Allocate new packet to send ARP update on current slave */
- upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
- if (upd_pkt == NULL) {
- RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
- continue;
- }
- pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
- + client_info->vlan_count * sizeof(struct vlan_hdr);
- upd_pkt->data_len = pkt_size;
- upd_pkt->pkt_len = pkt_size;
-
- slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
- internals);
-
- /* Add packet to update tx buffer */
- update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
- update_bufs_pkts[slave_idx]++;
- }
- }
- internals->mode6.ntt = 0;
- }
-
- /* Send ARP packets on proper slaves */
- for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
- if (slave_bufs_pkts[i] > 0) {
- num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
- slave_bufs[i], slave_bufs_pkts[i]);
- for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
- bufs[nb_pkts - 1 - num_not_send - j] =
- slave_bufs[i][nb_pkts - 1 - j];
- }
-
- num_tx_total += num_send;
- num_not_send += slave_bufs_pkts[i] - num_send;
-
-#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
- /* Print TX stats including update packets */
- for (j = 0; j < slave_bufs_pkts[i]; j++) {
- eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
- mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
- }
-#endif
- }
- }
-
- /* Send update packets on proper slaves */
- for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
- if (update_bufs_pkts[i] > 0) {
- num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
- update_bufs_pkts[i]);
- for (j = num_send; j < update_bufs_pkts[i]; j++) {
- rte_pktmbuf_free(update_bufs[i][j]);
- }
-#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
- for (j = 0; j < update_bufs_pkts[i]; j++) {
- eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
- mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
- }
-#endif
- }
- }
-
- /* Send non-ARP packets using tlb policy */
- if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
- num_send = bond_ethdev_tx_burst_tlb(queue,
- slave_bufs[RTE_MAX_ETHPORTS],
- slave_bufs_pkts[RTE_MAX_ETHPORTS]);
-
- for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
- bufs[nb_pkts - 1 - num_not_send - j] =
- slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
- }
-
- num_tx_total += num_send;
- num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
- }
-
- return num_tx_total;
-}
-
-static uint16_t
-bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
- uint16_t nb_pkts)
-{
- struct bond_dev_private *internals;
- struct bond_tx_queue *bd_tx_q;
-
- uint8_t num_of_slaves;
- uint8_t slaves[RTE_MAX_ETHPORTS];
-
- uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
-
- int i, op_slave_id;
-
- struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
- uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-
- bd_tx_q = (struct bond_tx_queue *)queue;
- internals = bd_tx_q->dev_private;
-
- /* Copy slave list to protect against slave up/down changes during tx
- * bursting */
- num_of_slaves = internals->active_slave_count;
- memcpy(slaves, internals->active_slaves,
- sizeof(internals->active_slaves[0]) * num_of_slaves);
-
- if (num_of_slaves < 1)
- return num_tx_total;
-
- /* Populate slaves mbuf with the packets which are to be sent on it */
- for (i = 0; i < nb_pkts; i++) {
- /* Select output slave using hash based on xmit policy */
- op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
-
- /* Populate slave mbuf arrays with mbufs for that slave */
- slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
- }
-
- /* Send packet burst on each slave device */
- for (i = 0; i < num_of_slaves; i++) {
- if (slave_nb_pkts[i] > 0) {
- num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
- slave_bufs[i], slave_nb_pkts[i]);
-
- /* if tx burst fails move packets to end of bufs */
- if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
- int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
-
- tx_fail_total += slave_tx_fail_count;
- memcpy(&bufs[nb_pkts - tx_fail_total],
- &slave_bufs[i][num_tx_slave],
- slave_tx_fail_count * sizeof(bufs[0]));
- }
-
- num_tx_total += num_tx_slave;
- }
- }
-
- return num_tx_total;
-}
-
-static uint16_t
-bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
- uint16_t nb_pkts)
-{
- struct bond_dev_private *internals;
- struct bond_tx_queue *bd_tx_q;
-
- uint8_t num_of_slaves;
- uint8_t slaves[RTE_MAX_ETHPORTS];
- /* positions in slaves, not ID */
- uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
- uint8_t distributing_count;
-
- uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
- uint16_t i, j, op_slave_idx;
- const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
-
- /* Allocate additional packets in case 8023AD mode. */
- struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
- void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
-
- /* Total amount of packets in slave_bufs */
- uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
- /* Slow packets placed in each slave */
- uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-
- bd_tx_q = (struct bond_tx_queue *)queue;
- internals = bd_tx_q->dev_private;
-
- /* Copy slave list to protect against slave up/down changes during tx
- * bursting */
- num_of_slaves = internals->active_slave_count;
- if (num_of_slaves < 1)
- return num_tx_total;
-
- memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
-
- distributing_count = 0;
- for (i = 0; i < num_of_slaves; i++) {
- struct port *port = &mode_8023ad_ports[slaves[i]];
-
- slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
- slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
- slave_nb_pkts[i] = slave_slow_nb_pkts[i];
-
- for (j = 0; j < slave_slow_nb_pkts[i]; j++)
- slave_bufs[i][j] = slow_pkts[j];
-
- if (ACTOR_STATE(port, DISTRIBUTING))
- distributing_offsets[distributing_count++] = i;
- }
-
- if (likely(distributing_count > 0)) {
- /* Populate slaves mbuf with the packets which are to be sent on it */
- for (i = 0; i < nb_pkts; i++) {
- /* Select output slave using hash based on xmit policy */
- op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
-
- /* Populate slave mbuf arrays with mbufs for that slave. Use only
- * slaves that are currently distributing. */
- uint8_t slave_offset = distributing_offsets[op_slave_idx];
- slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
- slave_nb_pkts[slave_offset]++;
- }
- }
-
- /* Send packet burst on each slave device */
- for (i = 0; i < num_of_slaves; i++) {
- if (slave_nb_pkts[i] == 0)
- continue;
-
- num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
- slave_bufs[i], slave_nb_pkts[i]);
-
- /* If tx burst fails drop slow packets */
- for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
- rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
-
- num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
- num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
-
- /* If tx burst fails move packets to end of bufs */
- if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
- uint16_t j = nb_pkts - num_tx_fail_total;
- for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
- bufs[j] = slave_bufs[i][num_tx_slave];
- }
- }
-
- return num_tx_total;
-}
-
-static uint16_t
-bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
- uint16_t nb_pkts)
-{
- struct bond_dev_private *internals;
- struct bond_tx_queue *bd_tx_q;
-
- uint8_t tx_failed_flag = 0, num_of_slaves;
- uint8_t slaves[RTE_MAX_ETHPORTS];
-
- uint16_t max_nb_of_tx_pkts = 0;
-
- int slave_tx_total[RTE_MAX_ETHPORTS];
- int i, most_successful_tx_slave = -1;
-
- bd_tx_q = (struct bond_tx_queue *)queue;
- internals = bd_tx_q->dev_private;
-
- /* Copy slave list to protect against slave up/down changes during tx
- * bursting */
- num_of_slaves = internals->active_slave_count;
- memcpy(slaves, internals->active_slaves,
- sizeof(internals->active_slaves[0]) * num_of_slaves);
-
- if (num_of_slaves < 1)
- return 0;
-
- /* Increment reference count on mbufs */
- for (i = 0; i < nb_pkts; i++)
- rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
-
- /* Transmit burst on each active slave */
- for (i = 0; i < num_of_slaves; i++) {
- slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
- bufs, nb_pkts);
-
- if (unlikely(slave_tx_total[i] < nb_pkts))
- tx_failed_flag = 1;
-
- /* record the value and slave index for the slave which transmits the
- * maximum number of packets */
- if (slave_tx_total[i] > max_nb_of_tx_pkts) {
- max_nb_of_tx_pkts = slave_tx_total[i];
- most_successful_tx_slave = i;
- }
- }
-
- /* if slaves fail to transmit packets from burst, the calling application
- * is not expected to know about multiple references to packets so we must
- * handle failures of all packets except those of the most successful slave
- */
- if (unlikely(tx_failed_flag))
- for (i = 0; i < num_of_slaves; i++)
- if (i != most_successful_tx_slave)
- while (slave_tx_total[i] < nb_pkts)
- rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
-
- return max_nb_of_tx_pkts;
-}
-
-void
-link_properties_set(struct rte_eth_dev *bonded_eth_dev,
- struct rte_eth_link *slave_dev_link)
-{
- struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
- struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
-
- if (slave_dev_link->link_status &&
- bonded_eth_dev->data->dev_started) {
- bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
- bonded_dev_link->link_speed = slave_dev_link->link_speed;
-
- internals->link_props_set = 1;
- }
-}
-
-void
-link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
-{
- struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
-
- memset(&(bonded_eth_dev->data->dev_link), 0,
- sizeof(bonded_eth_dev->data->dev_link));
-
- internals->link_props_set = 0;
-}
-
-int
-link_properties_valid(struct rte_eth_link *bonded_dev_link,
- struct rte_eth_link *slave_dev_link)
-{
- if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
- bonded_dev_link->link_speed != slave_dev_link->link_speed)
- return -1;
-
- return 0;
-}
-
-int
-mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
-{
- struct ether_addr *mac_addr;
-
- if (eth_dev == NULL) {
- RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
- return -1;
- }
-
- if (dst_mac_addr == NULL) {
- RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
- return -1;
- }
-
- mac_addr = eth_dev->data->mac_addrs;
-
- ether_addr_copy(mac_addr, dst_mac_addr);
- return 0;
-}
-
-int
-mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
-{
- struct ether_addr *mac_addr;
-
- if (eth_dev == NULL) {
- RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
- return -1;
- }
-
- if (new_mac_addr == NULL) {
- RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
- return -1;
- }
-
- mac_addr = eth_dev->data->mac_addrs;
-
- /* If new MAC is different to current MAC then update */
- if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
- memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
-
- return 0;
-}
-
-int
-mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
-{
- struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
- int i;
-
- /* Update slave devices MAC addresses */
- if (internals->slave_count < 1)
- return -1;
-
- switch (internals->mode) {
- case BONDING_MODE_ROUND_ROBIN:
- case BONDING_MODE_BALANCE:
- case BONDING_MODE_BROADCAST:
- for (i = 0; i < internals->slave_count; i++) {
- if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
- bonded_eth_dev->data->mac_addrs)) {
- RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
- internals->slaves[i].port_id);
- return -1;
- }
- }
- break;
- case BONDING_MODE_8023AD:
- bond_mode_8023ad_mac_address_update(bonded_eth_dev);
- break;
- case BONDING_MODE_ACTIVE_BACKUP:
- case BONDING_MODE_TLB:
- case BONDING_MODE_ALB:
- default:
- for (i = 0; i < internals->slave_count; i++) {
- if (internals->slaves[i].port_id ==
- internals->current_primary_port) {
- if (mac_address_set(&rte_eth_devices[internals->primary_port],
- bonded_eth_dev->data->mac_addrs)) {
- RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
- internals->current_primary_port);
- return -1;
- }
- } else {
- if (mac_address_set(
- &rte_eth_devices[internals->slaves[i].port_id],
- &internals->slaves[i].persisted_mac_addr)) {
- RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
- internals->slaves[i].port_id);
- return -1;
- }
- }
- }
- }
-
- return 0;
-}
-
-int
-bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
-{
- struct bond_dev_private *internals;
-
- internals = eth_dev->data->dev_private;
-
- switch (mode) {
- case BONDING_MODE_ROUND_ROBIN:
- eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
- eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
- break;
- case BONDING_MODE_ACTIVE_BACKUP:
- eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
- eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
- break;
- case BONDING_MODE_BALANCE:
- eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
- eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
- break;
- case BONDING_MODE_BROADCAST:
- eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
- eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
- break;
- case BONDING_MODE_8023AD:
- if (bond_mode_8023ad_enable(eth_dev) != 0)
- return -1;
-
- eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
- eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
- RTE_LOG(WARNING, PMD,
- "Using mode 4, it is necessary to do TX burst and RX burst "
- "at least every 100ms.\n");
- break;
- case BONDING_MODE_TLB:
- eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
- eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
- break;
- case BONDING_MODE_ALB:
- if (bond_mode_alb_enable(eth_dev) != 0)
- return -1;
-
- eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
- eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
- break;
- default:
- return -1;
- }
-
- internals->mode = mode;
-
- return 0;
-}
-
-int
-slave_configure(struct rte_eth_dev *bonded_eth_dev,
- struct rte_eth_dev *slave_eth_dev)
-{
- struct bond_rx_queue *bd_rx_q;
- struct bond_tx_queue *bd_tx_q;
-
- int errval;
- uint16_t q_id;
-
- /* Stop slave */
- rte_eth_dev_stop(slave_eth_dev->data->port_id);
-
- /* Enable interrupts on slave device if supported */
- if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
- slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
-
- /* Configure device */
- errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
- bonded_eth_dev->data->nb_rx_queues,
- bonded_eth_dev->data->nb_tx_queues,
- &(slave_eth_dev->data->dev_conf));
- if (errval != 0) {
- RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
- slave_eth_dev->data->port_id, errval);
- return errval;
- }
-
- /* Setup Rx Queues */
- for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
- bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
-
- errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
- bd_rx_q->nb_rx_desc,
- rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
- &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
- if (errval != 0) {
- RTE_BOND_LOG(ERR,
- "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
- slave_eth_dev->data->port_id, q_id, errval);
- return errval;
- }
- }
-
- /* Setup Tx Queues */
- for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
- bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
-
- errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
- bd_tx_q->nb_tx_desc,
- rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
- &bd_tx_q->tx_conf);
- if (errval != 0) {
- RTE_BOND_LOG(ERR,
- "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
- slave_eth_dev->data->port_id, q_id, errval);
- return errval;
- }
- }
-
- /* Start device */
- errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
- if (errval != 0) {
- RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
- slave_eth_dev->data->port_id, errval);
- return -1;
- }
-
- return 0;
-}
-
-void
-slave_remove(struct bond_dev_private *internals,
- struct rte_eth_dev *slave_eth_dev)
-{
- uint8_t i;
-
- for (i = 0; i < internals->slave_count; i++)
- if (internals->slaves[i].port_id ==
- slave_eth_dev->data->port_id)
- break;
-
- if (i < (internals->slave_count - 1))
- memmove(&internals->slaves[i], &internals->slaves[i + 1],
- sizeof(internals->slaves[0]) *
- (internals->slave_count - i - 1));
-
- internals->slave_count--;
-}
-
-static void
-bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
-
-void
-slave_add(struct bond_dev_private *internals,
- struct rte_eth_dev *slave_eth_dev)
-{
- struct bond_slave_details *slave_details =
- &internals->slaves[internals->slave_count];
-
- slave_details->port_id = slave_eth_dev->data->port_id;
- slave_details->last_link_status = 0;
-
- /* If slave device doesn't support interrupts then we need to enabled
- * polling to monitor link status */
- if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
- slave_details->link_status_poll_enabled = 1;
-
- if (!internals->link_status_polling_enabled) {
- internals->link_status_polling_enabled = 1;
-
- rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
- bond_ethdev_slave_link_status_change_monitor,
- (void *)&rte_eth_devices[internals->port_id]);
- }
- }
-
- slave_details->link_status_wait_to_complete = 0;
- /* clean tlb_last_obytes when adding port for bonding device */
- memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
- sizeof(struct ether_addr));
-}
-
-void
-bond_ethdev_primary_set(struct bond_dev_private *internals,
- uint8_t slave_port_id)
-{
- int i;
-
- if (internals->active_slave_count < 1)
- internals->current_primary_port = slave_port_id;
- else
- /* Search bonded device slave ports for new proposed primary port */
- for (i = 0; i < internals->active_slave_count; i++) {
- if (internals->active_slaves[i] == slave_port_id)
- internals->current_primary_port = slave_port_id;
- }
-}
-
-static void
-bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
-
-static int
-bond_ethdev_start(struct rte_eth_dev *eth_dev)
-{
- struct bond_dev_private *internals;
- int i;
-
- /* slave eth dev will be started by bonded device */
- if (valid_bonded_ethdev(eth_dev)) {
- RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
- eth_dev->data->port_id);
- return -1;
- }
-
- eth_dev->data->dev_link.link_status = 0;
- eth_dev->data->dev_started = 1;
-
- internals = eth_dev->data->dev_private;
-
- if (internals->slave_count == 0) {
- RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
- return -1;
- }
-
- if (internals->user_defined_mac == 0) {
- struct ether_addr *new_mac_addr = NULL;
-
- for (i = 0; i < internals->slave_count; i++)
- if (internals->slaves[i].port_id == internals->primary_port)
- new_mac_addr = &internals->slaves[i].persisted_mac_addr;
-
- if (new_mac_addr == NULL)
- return -1;
-
- if (mac_address_set(eth_dev, new_mac_addr) != 0) {
- RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
- eth_dev->data->port_id);
- return -1;
- }
- }
-
- /* Update all slave devices MACs*/
- if (mac_address_slaves_update(eth_dev) != 0)
- return -1;
-
- /* If bonded device is configure in promiscuous mode then re-apply config */
- if (internals->promiscuous_en)
- bond_ethdev_promiscuous_enable(eth_dev);
-
- /* Reconfigure each slave device if starting bonded device */
- for (i = 0; i < internals->slave_count; i++) {
- if (slave_configure(eth_dev,
- &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
- RTE_BOND_LOG(ERR,
- "bonded port (%d) failed to reconfigure slave device (%d)",
- eth_dev->data->port_id, internals->slaves[i].port_id);
- return -1;
- }
- }
-
- if (internals->user_defined_primary_port)
- bond_ethdev_primary_set(internals, internals->primary_port);
-
- if (internals->mode == BONDING_MODE_8023AD)
- bond_mode_8023ad_start(eth_dev);
-
- if (internals->mode == BONDING_MODE_TLB ||
- internals->mode == BONDING_MODE_ALB)
- bond_tlb_enable(internals);
-
- return 0;
-}
-
-static void
-bond_ethdev_stop(struct rte_eth_dev *eth_dev)
-{
- struct bond_dev_private *internals = eth_dev->data->dev_private;
- uint8_t i;
-
- if (internals->mode == BONDING_MODE_8023AD) {
- struct port *port;
- void *pkt = NULL;
-
- bond_mode_8023ad_stop(eth_dev);
-
- /* Discard all messages to/from mode 4 state machines */
- for (i = 0; i < internals->slave_count; i++) {
- port = &mode_8023ad_ports[internals->slaves[i].port_id];
-
- RTE_VERIFY(port->rx_ring != NULL);
- while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
- rte_pktmbuf_free(pkt);
-
- RTE_VERIFY(port->tx_ring != NULL);
- while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
- rte_pktmbuf_free(pkt);
- }
- }
-
- if (internals->mode == BONDING_MODE_TLB ||
- internals->mode == BONDING_MODE_ALB) {
- bond_tlb_disable(internals);
- for (i = 0; i < internals->active_slave_count; i++)
- tlb_last_obytets[internals->active_slaves[i]] = 0;
- }
-
- internals->active_slave_count = 0;
- internals->link_status_polling_enabled = 0;
-
- eth_dev->data->dev_link.link_status = 0;
- eth_dev->data->dev_started = 0;
-}
-
-static void
-bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
-{
-}
-
-/* forward declaration */
-static int bond_ethdev_configure(struct rte_eth_dev *dev);
-
-static void
-bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
-{
- struct bond_dev_private *internals = dev->data->dev_private;
-
- dev_info->driver_name = driver_name;
- dev_info->max_mac_addrs = 1;
-
- dev_info->max_rx_pktlen = (uint32_t)2048;
-
- dev_info->max_rx_queues = (uint16_t)128;
- dev_info->max_tx_queues = (uint16_t)512;
-
- dev_info->min_rx_bufsize = 0;
- dev_info->pci_dev = dev->pci_dev;
-
- dev_info->rx_offload_capa = internals->rx_offload_capa;
- dev_info->tx_offload_capa = internals->tx_offload_capa;
-}
-
-static int
-bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
- uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
- const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
-{
- struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
- rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
- 0, dev->pci_dev->numa_node);
- if (bd_rx_q == NULL)
- return -1;
-
- bd_rx_q->queue_id = rx_queue_id;
- bd_rx_q->dev_private = dev->data->dev_private;
-
- bd_rx_q->nb_rx_desc = nb_rx_desc;
-
- memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
- bd_rx_q->mb_pool = mb_pool;
-
- dev->data->rx_queues[rx_queue_id] = bd_rx_q;
-
- return 0;
-}
-
-static int
-bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
- uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
- const struct rte_eth_txconf *tx_conf)
-{
- struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
- rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
- 0, dev->pci_dev->numa_node);
-
- if (bd_tx_q == NULL)
- return -1;
-
- bd_tx_q->queue_id = tx_queue_id;
- bd_tx_q->dev_private = dev->data->dev_private;
-
- bd_tx_q->nb_tx_desc = nb_tx_desc;
- memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
-
- dev->data->tx_queues[tx_queue_id] = bd_tx_q;
-
- return 0;
-}
-
-static void
-bond_ethdev_rx_queue_release(void *queue)
-{
- if (queue == NULL)
- return;
-
- rte_free(queue);
-}
-
-static void
-bond_ethdev_tx_queue_release(void *queue)
-{
- if (queue == NULL)
- return;
-
- rte_free(queue);
-}
-
-static void
-bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
-{
- struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
- struct bond_dev_private *internals;
-
- /* Default value for polling slave found is true as we don't want to
- * disable the polling thread if we cannot get the lock */
- int i, polling_slave_found = 1;
-
- if (cb_arg == NULL)
- return;
-
- bonded_ethdev = (struct rte_eth_dev *)cb_arg;
- internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
-
- if (!bonded_ethdev->data->dev_started ||
- !internals->link_status_polling_enabled)
- return;
-
- /* If device is currently being configured then don't check slaves link
- * status, wait until next period */
- if (rte_spinlock_trylock(&internals->lock)) {
- if (internals->slave_count > 0)
- polling_slave_found = 0;
-
- for (i = 0; i < internals->slave_count; i++) {
- if (!internals->slaves[i].link_status_poll_enabled)
- continue;
-
- slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
- polling_slave_found = 1;
-
- /* Update slave link status */
- (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
- internals->slaves[i].link_status_wait_to_complete);
-
- /* if link status has changed since last checked then call lsc
- * event callback */
- if (slave_ethdev->data->dev_link.link_status !=
- internals->slaves[i].last_link_status) {
- internals->slaves[i].last_link_status =
- slave_ethdev->data->dev_link.link_status;
-
- bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
- RTE_ETH_EVENT_INTR_LSC,
- &bonded_ethdev->data->port_id);
- }
- }
- rte_spinlock_unlock(&internals->lock);
- }
-
- if (polling_slave_found)
- /* Set alarm to continue monitoring link status of slave ethdev's */
- rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
- bond_ethdev_slave_link_status_change_monitor, cb_arg);
-}
-
-static int
-bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
- int wait_to_complete)
-{
- struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
-
- if (!bonded_eth_dev->data->dev_started ||
- internals->active_slave_count == 0) {
- bonded_eth_dev->data->dev_link.link_status = 0;
- return 0;
- } else {
- struct rte_eth_dev *slave_eth_dev;
- int i, link_up = 0;
-
- for (i = 0; i < internals->active_slave_count; i++) {
- slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
-
- (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
- wait_to_complete);
- if (slave_eth_dev->data->dev_link.link_status == 1) {
- link_up = 1;
- break;
- }
- }
-
- bonded_eth_dev->data->dev_link.link_status = link_up;
- }
-
- return 0;
-}
-
-static void
-bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
-{
- struct bond_dev_private *internals = dev->data->dev_private;
- struct rte_eth_stats slave_stats;
-
- int i;
-
- /* clear bonded stats before populating from slaves */
- memset(stats, 0, sizeof(*stats));
-
- for (i = 0; i < internals->slave_count; i++) {
- rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
-
- stats->ipackets += slave_stats.ipackets;
- stats->opackets += slave_stats.opackets;
- stats->ibytes += slave_stats.ibytes;
- stats->obytes += slave_stats.obytes;
- stats->ierrors += slave_stats.ierrors;
- stats->oerrors += slave_stats.oerrors;
- stats->imcasts += slave_stats.imcasts;
- stats->rx_nombuf += slave_stats.rx_nombuf;
- stats->fdirmatch += slave_stats.fdirmatch;
- stats->fdirmiss += slave_stats.fdirmiss;
- stats->tx_pause_xon += slave_stats.tx_pause_xon;
- stats->rx_pause_xon += slave_stats.rx_pause_xon;
- stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
- stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
- }
-}
-
-static void
-bond_ethdev_stats_reset(struct rte_eth_dev *dev)
-{
- struct bond_dev_private *internals = dev->data->dev_private;
- int i;
-
- for (i = 0; i < internals->slave_count; i++)
- rte_eth_stats_reset(internals->slaves[i].port_id);
-}
-
-static void
-bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
-{
- struct bond_dev_private *internals = eth_dev->data->dev_private;
- int i;
-
- internals->promiscuous_en = 1;
-
- switch (internals->mode) {
- /* Promiscuous mode is propagated to all slaves */
- case BONDING_MODE_ROUND_ROBIN:
- case BONDING_MODE_BALANCE:
- case BONDING_MODE_BROADCAST:
- for (i = 0; i < internals->slave_count; i++)
- rte_eth_promiscuous_enable(internals->slaves[i].port_id);
- break;
- /* In mode4 promiscus mode is managed when slave is added/removed */
- case BONDING_MODE_8023AD:
- break;
- /* Promiscuous mode is propagated only to primary slave */
- case BONDING_MODE_ACTIVE_BACKUP:
- case BONDING_MODE_TLB:
- case BONDING_MODE_ALB:
- default:
- rte_eth_promiscuous_enable(internals->current_primary_port);
- }
-}
-
-static void
-bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
-{
- struct bond_dev_private *internals = dev->data->dev_private;
- int i;
-
- internals->promiscuous_en = 0;
-
- switch (internals->mode) {
- /* Promiscuous mode is propagated to all slaves */
- case BONDING_MODE_ROUND_ROBIN:
- case BONDING_MODE_BALANCE:
- case BONDING_MODE_BROADCAST:
- for (i = 0; i < internals->slave_count; i++)
- rte_eth_promiscuous_disable(internals->slaves[i].port_id);
- break;
- /* In mode4 promiscus mode is set managed when slave is added/removed */
- case BONDING_MODE_8023AD:
- break;
- /* Promiscuous mode is propagated only to primary slave */
- case BONDING_MODE_ACTIVE_BACKUP:
- case BONDING_MODE_TLB:
- case BONDING_MODE_ALB:
- default:
- rte_eth_promiscuous_disable(internals->current_primary_port);
- }
-}
-
-static void
-bond_ethdev_delayed_lsc_propagation(void *arg)
-{
- if (arg == NULL)
- return;
-
- _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
- RTE_ETH_EVENT_INTR_LSC);
-}
-
-void
-bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
- void *param)
-{
- struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
- struct bond_dev_private *internals;
- struct rte_eth_link link;
-
- int i, valid_slave = 0;
- uint8_t active_pos;
- uint8_t lsc_flag = 0;
-
- if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
- return;
-
- bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
- slave_eth_dev = &rte_eth_devices[port_id];
-
- if (valid_bonded_ethdev(bonded_eth_dev))
- return;
-
- internals = bonded_eth_dev->data->dev_private;
-
- /* If the device isn't started don't handle interrupts */
- if (!bonded_eth_dev->data->dev_started)
- return;
-
- /* verify that port_id is a valid slave of bonded port */
- for (i = 0; i < internals->slave_count; i++) {
- if (internals->slaves[i].port_id == port_id) {
- valid_slave = 1;
- break;
- }
- }
-
- if (!valid_slave)
- return;
-
- /* Search for port in active port list */
- active_pos = find_slave_by_id(internals->active_slaves,
- internals->active_slave_count, port_id);
-
- rte_eth_link_get_nowait(port_id, &link);
- if (link.link_status) {
- if (active_pos < internals->active_slave_count)
- return;
-
- /* if no active slave ports then set this port to be primary port */
- if (internals->active_slave_count < 1) {
- /* If first active slave, then change link status */
- bonded_eth_dev->data->dev_link.link_status = 1;
- internals->current_primary_port = port_id;
- lsc_flag = 1;
-
- mac_address_slaves_update(bonded_eth_dev);
-
- /* Inherit eth dev link properties from first active slave */
- link_properties_set(bonded_eth_dev,
- &(slave_eth_dev->data->dev_link));
- }
-
- activate_slave(bonded_eth_dev, port_id);
-
- /* If user has defined the primary port then default to using it */
- if (internals->user_defined_primary_port &&
- internals->primary_port == port_id)
- bond_ethdev_primary_set(internals, port_id);
- } else {
- if (active_pos == internals->active_slave_count)
- return;
-
- /* Remove from active slave list */
- deactivate_slave(bonded_eth_dev, port_id);
-
- /* No active slaves, change link status to down and reset other
- * link properties */
- if (internals->active_slave_count < 1) {
- lsc_flag = 1;
- bonded_eth_dev->data->dev_link.link_status = 0;
-
- link_properties_reset(bonded_eth_dev);
- }
-
- /* Update primary id, take first active slave from list or if none
- * available set to -1 */
- if (port_id == internals->current_primary_port) {
- if (internals->active_slave_count > 0)
- bond_ethdev_primary_set(internals,
- internals->active_slaves[0]);
- else
- internals->current_primary_port = internals->primary_port;
- }
- }
-
- if (lsc_flag) {
- /* Cancel any possible outstanding interrupts if delays are enabled */
- if (internals->link_up_delay_ms > 0 ||
- internals->link_down_delay_ms > 0)
- rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
- bonded_eth_dev);
-
- if (bonded_eth_dev->data->dev_link.link_status) {
- if (internals->link_up_delay_ms > 0)
- rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
- bond_ethdev_delayed_lsc_propagation,
- (void *)bonded_eth_dev);
- else
- _rte_eth_dev_callback_process(bonded_eth_dev,
- RTE_ETH_EVENT_INTR_LSC);
-
- } else {
- if (internals->link_down_delay_ms > 0)
- rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
- bond_ethdev_delayed_lsc_propagation,
- (void *)bonded_eth_dev);
- else
- _rte_eth_dev_callback_process(bonded_eth_dev,
- RTE_ETH_EVENT_INTR_LSC);
- }
- }
-}
-
-struct eth_dev_ops default_dev_ops = {
- .dev_start = bond_ethdev_start,
- .dev_stop = bond_ethdev_stop,
- .dev_close = bond_ethdev_close,
- .dev_configure = bond_ethdev_configure,
- .dev_infos_get = bond_ethdev_info,
- .rx_queue_setup = bond_ethdev_rx_queue_setup,
- .tx_queue_setup = bond_ethdev_tx_queue_setup,
- .rx_queue_release = bond_ethdev_rx_queue_release,
- .tx_queue_release = bond_ethdev_tx_queue_release,
- .link_update = bond_ethdev_link_update,
- .stats_get = bond_ethdev_stats_get,
- .stats_reset = bond_ethdev_stats_reset,
- .promiscuous_enable = bond_ethdev_promiscuous_enable,
- .promiscuous_disable = bond_ethdev_promiscuous_disable
-};
-
-static int
-bond_init(const char *name, const char *params)
-{
- struct bond_dev_private *internals;
- struct rte_kvargs *kvlist;
- uint8_t bonding_mode, socket_id;
- int arg_count, port_id;
-
- RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
-
- kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
- if (kvlist == NULL)
- return -1;
-
- /* Parse link bonding mode */
- if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
- if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
- &bond_ethdev_parse_slave_mode_kvarg,
- &bonding_mode) != 0) {
- RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
- name);
- goto parse_error;
- }
- } else {
- RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
- "device %s\n", name);
- goto parse_error;
- }
-
- /* Parse socket id to create bonding device on */
- arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
- if (arg_count == 1) {
- if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
- &bond_ethdev_parse_socket_id_kvarg, &socket_id)
- != 0) {
- RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
- "bonded device %s\n", name);
- goto parse_error;
- }
- } else if (arg_count > 1) {
- RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
- "bonded device %s\n", name);
- goto parse_error;
- } else {
- socket_id = rte_socket_id();
- }
-
- /* Create link bonding eth device */
- port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
- if (port_id < 0) {
- RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
- "socket %u.\n", name, bonding_mode, socket_id);
- goto parse_error;
- }
- internals = rte_eth_devices[port_id].data->dev_private;
- internals->kvlist = kvlist;
-
- RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
- "socket %u.\n", name, port_id, bonding_mode, socket_id);
- return 0;
-
-parse_error:
- rte_kvargs_free(kvlist);
-
- return -1;
-}
-
-/* this part will resolve the slave portids after all the other pdev and vdev
- * have been allocated */
-static int
-bond_ethdev_configure(struct rte_eth_dev *dev)
-{
- char *name = dev->data->name;
- struct bond_dev_private *internals = dev->data->dev_private;
- struct rte_kvargs *kvlist = internals->kvlist;
- int arg_count;
- uint8_t port_id = dev - rte_eth_devices;
-
- /*
- * if no kvlist, it means that this bonded device has been created
- * through the bonding api.
- */
- if (!kvlist)
- return 0;
-
- /* Parse MAC address for bonded device */
- arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
- if (arg_count == 1) {
- struct ether_addr bond_mac;
-
- if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
- &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
- RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
- name);
- return -1;
- }
-
- /* Set MAC address */
- if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
- RTE_LOG(ERR, EAL,
- "Failed to set mac address on bonded device %s\n",
- name);
- return -1;
- }
- } else if (arg_count > 1) {
- RTE_LOG(ERR, EAL,
- "MAC address can be specified only once for bonded device %s\n",
- name);
- return -1;
- }
-
- /* Parse/set balance mode transmit policy */
- arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
- if (arg_count == 1) {
- uint8_t xmit_policy;
-
- if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
- &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
- 0) {
- RTE_LOG(INFO, EAL,
- "Invalid xmit policy specified for bonded device %s\n",
- name);
- return -1;
- }
-
- /* Set balance mode transmit policy*/
- if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
- RTE_LOG(ERR, EAL,
- "Failed to set balance xmit policy on bonded device %s\n",
- name);
- return -1;
- }
- } else if (arg_count > 1) {
- RTE_LOG(ERR, EAL,
- "Transmit policy can be specified only once for bonded device"
- " %s\n", name);
- return -1;
- }
-
- /* Parse/add slave ports to bonded device */
- if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
- struct bond_ethdev_slave_ports slave_ports;
- unsigned i;
-
- memset(&slave_ports, 0, sizeof(slave_ports));
-
- if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
- &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
- RTE_LOG(ERR, EAL,
- "Failed to parse slave ports for bonded device %s\n",
- name);
- return -1;
- }
-
- for (i = 0; i < slave_ports.slave_count; i++) {
- if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
- RTE_LOG(ERR, EAL,
- "Failed to add port %d as slave to bonded device %s\n",
- slave_ports.slaves[i], name);
- }
- }
-
- } else {
- RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
- return -1;
- }
-
- /* Parse/set primary slave port id*/
- arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
- if (arg_count == 1) {
- uint8_t primary_slave_port_id;
-
- if (rte_kvargs_process(kvlist,
- PMD_BOND_PRIMARY_SLAVE_KVARG,
- &bond_ethdev_parse_primary_slave_port_id_kvarg,
- &primary_slave_port_id) < 0) {
- RTE_LOG(INFO, EAL,
- "Invalid primary slave port id specified for bonded device"
- " %s\n", name);
- return -1;
- }
-
- /* Set balance mode transmit policy*/
- if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
- != 0) {
- RTE_LOG(ERR, EAL,
- "Failed to set primary slave port %d on bonded device %s\n",
- primary_slave_port_id, name);
- return -1;
- }
- } else if (arg_count > 1) {
- RTE_LOG(INFO, EAL,
- "Primary slave can be specified only once for bonded device"
- " %s\n", name);
- return -1;
- }
-
- /* Parse link status monitor polling interval */
- arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
- if (arg_count == 1) {
- uint32_t lsc_poll_interval_ms;
-
- if (rte_kvargs_process(kvlist,
- PMD_BOND_LSC_POLL_PERIOD_KVARG,
- &bond_ethdev_parse_time_ms_kvarg,
- &lsc_poll_interval_ms) < 0) {
- RTE_LOG(INFO, EAL,
- "Invalid lsc polling interval value specified for bonded"
- " device %s\n", name);
- return -1;
- }
-
- if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
- != 0) {
- RTE_LOG(ERR, EAL,
- "Failed to set lsc monitor polling interval (%u ms) on"
- " bonded device %s\n", lsc_poll_interval_ms, name);
- return -1;
- }
- } else if (arg_count > 1) {
- RTE_LOG(INFO, EAL,
- "LSC polling interval can be specified only once for bonded"
- " device %s\n", name);
- return -1;
- }
-
- /* Parse link up interrupt propagation delay */
- arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
- if (arg_count == 1) {
- uint32_t link_up_delay_ms;
-
- if (rte_kvargs_process(kvlist,
- PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
- &bond_ethdev_parse_time_ms_kvarg,
- &link_up_delay_ms) < 0) {
- RTE_LOG(INFO, EAL,
- "Invalid link up propagation delay value specified for"
- " bonded device %s\n", name);
- return -1;
- }
-
- /* Set balance mode transmit policy*/
- if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
- != 0) {
- RTE_LOG(ERR, EAL,
- "Failed to set link up propagation delay (%u ms) on bonded"
- " device %s\n", link_up_delay_ms, name);
- return -1;
- }
- } else if (arg_count > 1) {
- RTE_LOG(INFO, EAL,
- "Link up propagation delay can be specified only once for"
- " bonded device %s\n", name);
- return -1;
- }
-
- /* Parse link down interrupt propagation delay */
- arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
- if (arg_count == 1) {
- uint32_t link_down_delay_ms;
-
- if (rte_kvargs_process(kvlist,
- PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
- &bond_ethdev_parse_time_ms_kvarg,
- &link_down_delay_ms) < 0) {
- RTE_LOG(INFO, EAL,
- "Invalid link down propagation delay value specified for"
- " bonded device %s\n", name);
- return -1;
- }
-
- /* Set balance mode transmit policy*/
- if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
- != 0) {
- RTE_LOG(ERR, EAL,
- "Failed to set link down propagation delay (%u ms) on"
- " bonded device %s\n", link_down_delay_ms, name);
- return -1;
- }
- } else if (arg_count > 1) {
- RTE_LOG(INFO, EAL,
- "Link down propagation delay can be specified only once for"
- " bonded device %s\n", name);
- return -1;
- }
-
- return 0;
-}
-
-static struct rte_driver bond_drv = {
- .name = "eth_bond",
- .type = PMD_VDEV,
- .init = bond_init,
-};
-
-PMD_REGISTER_DRIVER(bond_drv);
+++ /dev/null
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_ETH_BOND_PRIVATE_H_
-#define _RTE_ETH_BOND_PRIVATE_H_
-
-#include <rte_ethdev.h>
-#include <rte_spinlock.h>
-
-#include "rte_eth_bond.h"
-#include "rte_eth_bond_8023ad_private.h"
-#include "rte_eth_bond_alb.h"
-
-#define PMD_BOND_SLAVE_PORT_KVARG ("slave")
-#define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary")
-#define PMD_BOND_MODE_KVARG ("mode")
-#define PMD_BOND_XMIT_POLICY_KVARG ("xmit_policy")
-#define PMD_BOND_SOCKET_ID_KVARG ("socket_id")
-#define PMD_BOND_MAC_ADDR_KVARG ("mac")
-#define PMD_BOND_LSC_POLL_PERIOD_KVARG ("lsc_poll_period_ms")
-#define PMD_BOND_LINK_UP_PROP_DELAY_KVARG ("up_delay")
-#define PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG ("down_delay")
-
-#define PMD_BOND_XMIT_POLICY_LAYER2_KVARG ("l2")
-#define PMD_BOND_XMIT_POLICY_LAYER23_KVARG ("l23")
-#define PMD_BOND_XMIT_POLICY_LAYER34_KVARG ("l34")
-
-#define RTE_BOND_LOG(lvl, msg, ...) \
- RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__)
-
-#define BONDING_MODE_INVALID 0xFF
-
-extern const char *pmd_bond_init_valid_arguments[];
-
-extern const char *driver_name;
-
-/** Port Queue Mapping Structure */
-struct bond_rx_queue {
- uint16_t queue_id;
- /**< Queue Id */
- struct bond_dev_private *dev_private;
- /**< Reference to eth_dev private structure */
- uint16_t nb_rx_desc;
- /**< Number of RX descriptors available for the queue */
- struct rte_eth_rxconf rx_conf;
- /**< Copy of RX configuration structure for queue */
- struct rte_mempool *mb_pool;
- /**< Reference to mbuf pool to use for RX queue */
-};
-
-struct bond_tx_queue {
- uint16_t queue_id;
- /**< Queue Id */
- struct bond_dev_private *dev_private;
- /**< Reference to dev private structure */
- uint16_t nb_tx_desc;
- /**< Number of TX descriptors available for the queue */
- struct rte_eth_txconf tx_conf;
- /**< Copy of TX configuration structure for queue */
-};
-
-/** Bonded slave devices structure */
-struct bond_ethdev_slave_ports {
- uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */
- uint8_t slave_count; /**< Number of slaves */
-};
-
-struct bond_slave_details {
- uint8_t port_id;
-
- uint8_t link_status_poll_enabled;
- uint8_t link_status_wait_to_complete;
- uint8_t last_link_status;
- /**< Port Id of slave eth_dev */
- struct ether_addr persisted_mac_addr;
-};
-
-
-typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count);
-
-/** Link Bonding PMD device private configuration Structure */
-struct bond_dev_private {
- uint8_t port_id; /**< Port Id of Bonded Port */
- uint8_t mode; /**< Link Bonding Mode */
-
- rte_spinlock_t lock;
-
- uint8_t primary_port; /**< Primary Slave Port */
- uint8_t current_primary_port; /**< Primary Slave Port */
- uint8_t user_defined_primary_port;
- /**< Flag for whether primary port is user defined or not */
-
- uint8_t balance_xmit_policy;
- /**< Transmit policy - l2 / l23 / l34 for operation in balance mode */
- xmit_hash_t xmit_hash;
- /**< Transmit policy hash function */
-
- uint8_t user_defined_mac;
- /**< Flag for whether MAC address is user defined or not */
- uint8_t promiscuous_en;
- /**< Enabled/disable promiscuous mode on bonding device */
- uint8_t link_props_set;
- /**< flag to denote if the link properties are set */
-
- uint8_t link_status_polling_enabled;
- uint32_t link_status_polling_interval_ms;
-
- uint32_t link_down_delay_ms;
- uint32_t link_up_delay_ms;
-
- uint16_t nb_rx_queues; /**< Total number of rx queues */
- uint16_t nb_tx_queues; /**< Total number of tx queues*/
-
- uint8_t active_slave_count; /**< Number of active slaves */
- uint8_t active_slaves[RTE_MAX_ETHPORTS]; /**< Active slave list */
-
- uint8_t slave_count; /**< Number of bonded slaves */
- struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
- /**< Arary of bonded slaves details */
-
- struct mode8023ad_private mode4;
- uint8_t tlb_slaves_order[RTE_MAX_ETHPORTS]; /* TLB active slaves send order */
- struct mode_alb_private mode6;
-
- uint32_t rx_offload_capa; /** Rx offload capability */
- uint32_t tx_offload_capa; /** Tx offload capability */
-
- struct rte_kvargs *kvlist;
- uint8_t slave_update_idx;
-};
-
-extern struct eth_dev_ops default_dev_ops;
-
-int
-valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
-
-/* Search given slave array to find possition of given id.
- * Return slave pos or slaves_count if not found. */
-static inline uint8_t
-find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, uint8_t slave_id) {
-
- uint8_t pos;
- for (pos = 0; pos < slaves_count; pos++) {
- if (slave_id == slaves[pos])
- break;
- }
-
- return pos;
-}
-
-int
-valid_port_id(uint8_t port_id);
-
-int
-valid_bonded_port_id(uint8_t port_id);
-
-int
-valid_slave_port_id(uint8_t port_id);
-
-void
-deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id);
-
-void
-activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id);
-
-void
-link_properties_set(struct rte_eth_dev *bonded_eth_dev,
- struct rte_eth_link *slave_dev_link);
-void
-link_properties_reset(struct rte_eth_dev *bonded_eth_dev);
-
-int
-link_properties_valid(struct rte_eth_link *bonded_dev_link,
- struct rte_eth_link *slave_dev_link);
-
-int
-mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr);
-
-int
-mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr);
-
-int
-mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
-
-uint8_t
-number_of_sockets(void);
-
-int
-bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode);
-
-int
-slave_configure(struct rte_eth_dev *bonded_eth_dev,
- struct rte_eth_dev *slave_eth_dev);
-
-void
-slave_remove(struct bond_dev_private *internals,
- struct rte_eth_dev *slave_eth_dev);
-
-void
-slave_add(struct bond_dev_private *internals,
- struct rte_eth_dev *slave_eth_dev);
-
-uint16_t
-xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count);
-
-uint16_t
-xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count);
-
-uint16_t
-xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count);
-
-void
-bond_ethdev_primary_set(struct bond_dev_private *internals,
- uint8_t slave_port_id);
-
-void
-bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
- void *param);
-
-int
-bond_ethdev_parse_slave_port_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args);
-
-int
-bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args);
-
-int
-bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args);
-
-int
-bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args);
-
-int
-bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args);
-
-int
-bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args);
-
-int
-bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused,
- const char *value, void *extra_args);
-
-void
-bond_tlb_disable(struct bond_dev_private *internals);
-
-void
-bond_tlb_enable(struct bond_dev_private *internals);
-
-void
-bond_tlb_activate_slave(struct bond_dev_private *internals);
-
-#endif
+++ /dev/null
-DPDK_2.0 {
- global:
-
- rte_eth_bond_8023ad_conf_get;
- rte_eth_bond_8023ad_setup;
- rte_eth_bond_active_slaves_get;
- rte_eth_bond_create;
- rte_eth_bond_link_monitoring_set;
- rte_eth_bond_mac_address_reset;
- rte_eth_bond_mac_address_set;
- rte_eth_bond_mode_get;
- rte_eth_bond_mode_set;
- rte_eth_bond_primary_get;
- rte_eth_bond_primary_set;
- rte_eth_bond_slave_add;
- rte_eth_bond_slave_remove;
- rte_eth_bond_slaves_get;
- rte_eth_bond_xmit_policy_get;
- rte_eth_bond_xmit_policy_set;
-
- local: *;
-};