/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <rte_ethdev.h>
+#include <rte_ethdev_vdev.h>
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_ip.h>
#include <rte_ip_frag.h>
#include <rte_devargs.h>
#include <rte_kvargs.h>
-#include <rte_dev.h>
+#include <rte_vdev.h>
#include <rte_alarm.h>
#include <rte_cycles.h>
#include "rte_eth_bond_8023ad_private.h"
#define REORDER_PERIOD_MS 10
+#define DEFAULT_POLLING_INTERVAL_10_MS (10)
#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
bd_rx_q->queue_id, bufs, nb_pkts);
}
+static inline uint8_t
+is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
+{
+ const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
+
+ return !vlan_tci && (ethertype == ether_type_slow_be &&
+ (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
+}
+
static uint16_t
bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
uint16_t nb_pkts)
const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
uint16_t num_rx_total = 0; /* Total number of received packets */
uint8_t slaves[RTE_MAX_ETHPORTS];
- uint8_t slave_count;
+ uint8_t slave_count, idx;
uint8_t collecting; /* current slave collecting status */
const uint8_t promisc = internals->promiscuous_en;
uint8_t i, j, k;
+ uint8_t subtype;
rte_eth_macaddr_get(internals->port_id, &bond_mac);
/* Copy slave list to protect against slave up/down changes during tx
memcpy(slaves, internals->active_slaves,
sizeof(internals->active_slaves[0]) * slave_count);
+ idx = internals->active_slave;
+ if (idx >= slave_count) {
+ internals->active_slave = 0;
+ idx = 0;
+ }
for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
j = num_rx_total;
- collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
+ collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
+ COLLECTING);
/* Read packets from this slave */
- num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
+ num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
&bufs[num_rx_total], nb_pkts - num_rx_total);
for (k = j; k < 2 && k < num_rx_total; k++)
rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+ subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
+
/* Remove packet from array if it is slow packet or slave is not
* in collecting state or bondign interface is not in promiscus
* mode and packet address does not match. */
- if (unlikely(hdr->ether_type == ether_type_slow_be ||
+ if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
!collecting || (!promisc &&
!is_multicast_ether_addr(&hdr->d_addr) &&
!is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
if (hdr->ether_type == ether_type_slow_be) {
- bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
- bufs[j]);
+ bond_mode_8023ad_handle_slow_pkt(
+ internals, slaves[idx], bufs[j]);
} else
rte_pktmbuf_free(bufs[j]);
} else
j++;
}
+ if (unlikely(++idx == slave_count))
+ idx = 0;
}
+ internals->active_slave = idx;
return num_rx_total;
}
uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
/*
- * We create separate transmit buffers for update packets as they wont be
- * counted in num_tx_total.
+ * We create separate transmit buffers for update packets as they won't
+ * be counted in num_tx_total.
*/
struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
}
num_tx_total += num_send;
- num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
}
return num_tx_total;
struct port *port = &mode_8023ad_ports[slaves[i]];
slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
- slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
+ slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
+ NULL);
slave_nb_pkts[i] = slave_slow_nb_pkts[i];
for (j = 0; j < slave_slow_nb_pkts[i]; j++)
struct bond_rx_queue *bd_rx_q;
struct bond_tx_queue *bd_tx_q;
- uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
- uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
int errval;
uint16_t q_id;
bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
}
+ slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
+ bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
+
/* Configure device */
errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
bonded_eth_dev->data->nb_rx_queues,
}
/* Setup Rx Queues */
- /* Use existing queues, if any */
- for (q_id = old_nb_rx_queues;
- q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
+ for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
}
/* Setup Tx Queues */
- /* Use existing queues, if any */
- for (q_id = old_nb_tx_queues;
- q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
+ for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
}
/* If lsc interrupt is set, check initial slave's link status */
- if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
+ if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+ slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
+ }
return 0;
}
(internals->slave_count - i - 1));
internals->slave_count--;
+
+ /* force reconfiguration of slave interfaces */
+ _rte_eth_dev_reset(slave_eth_dev);
}
static void
void
bond_ethdev_close(struct rte_eth_dev *dev)
{
+ struct bond_dev_private *internals = dev->data->dev_private;
+ uint8_t bond_port_id = internals->port_id;
+ int skipped = 0;
+
+ RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->data->name);
+ while (internals->slave_count != skipped) {
+ uint8_t port_id = internals->slaves[skipped].port_id;
+
+ rte_eth_dev_stop(port_id);
+
+ if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to remove port %d from bonded device "
+ "%s\n", port_id, dev->data->name);
+ skipped++;
+ }
+ }
bond_ethdev_free_queues(dev);
+ rte_bitmap_reset(internals->vlan_filter_bmp);
}
/* forward declaration */
dev_info->max_mac_addrs = 1;
- dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
- internals->candidate_max_rx_pktlen : 2048;
+ dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen
+ ? internals->candidate_max_rx_pktlen
+ : ETHER_MAX_JUMBO_FRAME_LEN;
dev_info->max_rx_queues = (uint16_t)128;
dev_info->max_tx_queues = (uint16_t)512;
dev_info->min_rx_bufsize = 0;
- dev_info->pci_dev = NULL;
dev_info->rx_offload_capa = internals->rx_offload_capa;
dev_info->tx_offload_capa = internals->tx_offload_capa;
dev_info->reta_size = internals->reta_size;
}
+static int
+bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+ int res;
+ uint8_t i;
+ struct bond_dev_private *internals = dev->data->dev_private;
+
+ /* don't do this while a slave is being added */
+ rte_spinlock_lock(&internals->lock);
+
+ if (on)
+ rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
+ else
+ rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
+
+ for (i = 0; i < internals->slave_count; i++) {
+ uint8_t port_id = internals->slaves[i].port_id;
+
+ res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
+ if (res == ENOTSUP)
+ RTE_LOG(WARNING, PMD,
+ "Setting VLAN filter on slave port %u not supported.\n",
+ port_id);
+ }
+
+ rte_spinlock_unlock(&internals->lock);
+ return 0;
+}
+
static int
bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
return;
_rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
- RTE_ETH_EVENT_INTR_LSC);
+ RTE_ETH_EVENT_INTR_LSC, NULL);
}
void
/* Inherit eth dev link properties from first active slave */
link_properties_set(bonded_eth_dev,
&(slave_eth_dev->data->dev_link));
+ } else {
+ if (link_properties_valid(
+ &bonded_eth_dev->data->dev_link, &link) != 0) {
+ slave_eth_dev->data->dev_flags &=
+ (~RTE_ETH_DEV_BONDED_SLAVE);
+ RTE_LOG(ERR, PMD,
+ "port %u invalid speed/duplex\n",
+ port_id);
+ return;
+ }
}
activate_slave(bonded_eth_dev, port_id);
(void *)bonded_eth_dev);
else
_rte_eth_dev_callback_process(bonded_eth_dev,
- RTE_ETH_EVENT_INTR_LSC);
+ RTE_ETH_EVENT_INTR_LSC, NULL);
} else {
if (internals->link_down_delay_ms > 0)
(void *)bonded_eth_dev);
else
_rte_eth_dev_callback_process(bonded_eth_dev,
- RTE_ETH_EVENT_INTR_LSC);
+ RTE_ETH_EVENT_INTR_LSC, NULL);
}
}
}
.dev_close = bond_ethdev_close,
.dev_configure = bond_ethdev_configure,
.dev_infos_get = bond_ethdev_info,
+ .vlan_filter_set = bond_ethdev_vlan_filter_set,
.rx_queue_setup = bond_ethdev_rx_queue_setup,
.tx_queue_setup = bond_ethdev_tx_queue_setup,
.rx_queue_release = bond_ethdev_rx_queue_release,
};
static int
-bond_init(const char *name, const char *params)
+bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
{
+ const char *name = rte_vdev_device_name(dev);
+ uint8_t socket_id = dev->device.numa_node;
+ struct bond_dev_private *internals = NULL;
+ struct rte_eth_dev *eth_dev = NULL;
+ uint32_t vlan_filter_bmp_size;
+
+ /* now do all data allocation - for eth_dev structure, dummy pci driver
+ * and internal (private) data
+ */
+
+ /* reserve an ethdev entry */
+ eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
+ if (eth_dev == NULL) {
+ RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
+ goto err;
+ }
+
+ internals = eth_dev->data->dev_private;
+ eth_dev->data->nb_rx_queues = (uint16_t)1;
+ eth_dev->data->nb_tx_queues = (uint16_t)1;
+
+ eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
+ socket_id);
+ if (eth_dev->data->mac_addrs == NULL) {
+ RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
+ goto err;
+ }
+
+ eth_dev->dev_ops = &default_dev_ops;
+ eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
+ RTE_ETH_DEV_DETACHABLE;
+
+ rte_spinlock_init(&internals->lock);
+
+ internals->port_id = eth_dev->data->port_id;
+ internals->mode = BONDING_MODE_INVALID;
+ internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
+ internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
+ internals->xmit_hash = xmit_l2_hash;
+ internals->user_defined_mac = 0;
+ internals->link_props_set = 0;
+
+ internals->link_status_polling_enabled = 0;
+
+ internals->link_status_polling_interval_ms =
+ DEFAULT_POLLING_INTERVAL_10_MS;
+ internals->link_down_delay_ms = 0;
+ internals->link_up_delay_ms = 0;
+
+ internals->slave_count = 0;
+ internals->active_slave_count = 0;
+ internals->rx_offload_capa = 0;
+ internals->tx_offload_capa = 0;
+ internals->candidate_max_rx_pktlen = 0;
+ internals->max_rx_pktlen = 0;
+
+ /* Initially allow to choose any offload type */
+ internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
+
+ memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
+ memset(internals->slaves, 0, sizeof(internals->slaves));
+
+ /* Set mode 4 default configuration */
+ bond_mode_8023ad_setup(eth_dev, NULL);
+ if (bond_ethdev_mode_set(eth_dev, mode)) {
+ RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
+ eth_dev->data->port_id, mode);
+ goto err;
+ }
+
+ vlan_filter_bmp_size =
+ rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
+ internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
+ RTE_CACHE_LINE_SIZE);
+ if (internals->vlan_filter_bmpmem == NULL) {
+ RTE_BOND_LOG(ERR,
+ "Failed to allocate vlan bitmap for bonded device %u\n",
+ eth_dev->data->port_id);
+ goto err;
+ }
+
+ internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
+ internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
+ if (internals->vlan_filter_bmp == NULL) {
+ RTE_BOND_LOG(ERR,
+ "Failed to init vlan bitmap for bonded device %u\n",
+ eth_dev->data->port_id);
+ rte_free(internals->vlan_filter_bmpmem);
+ goto err;
+ }
+
+ return eth_dev->data->port_id;
+
+err:
+ rte_free(internals);
+ if (eth_dev != NULL) {
+ rte_free(eth_dev->data->mac_addrs);
+ rte_eth_dev_release_port(eth_dev);
+ }
+ return -1;
+}
+
+static int
+bond_probe(struct rte_vdev_device *dev)
+{
+ const char *name;
struct bond_dev_private *internals;
struct rte_kvargs *kvlist;
uint8_t bonding_mode, socket_id;
int arg_count, port_id;
+ if (!dev)
+ return -EINVAL;
+
+ name = rte_vdev_device_name(dev);
RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
- kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
+ kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
+ pmd_bond_init_valid_arguments);
if (kvlist == NULL)
return -1;
socket_id = rte_socket_id();
}
+ dev->device.numa_node = socket_id;
+
/* Create link bonding eth device */
- port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
+ port_id = bond_alloc(dev, bonding_mode);
if (port_id < 0) {
RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
"socket %u.\n", name, bonding_mode, socket_id);
}
static int
-bond_uninit(const char *name)
+bond_remove(struct rte_vdev_device *dev)
{
- int ret;
+ struct rte_eth_dev *eth_dev;
+ struct bond_dev_private *internals;
+ const char *name;
- if (name == NULL)
+ if (!dev)
return -EINVAL;
+ name = rte_vdev_device_name(dev);
RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
- /* free link bonding eth device */
- ret = rte_eth_bond_free(name);
- if (ret < 0)
- RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
+ /* now free all data allocation - for eth_dev structure,
+ * dummy pci driver and internal (private) data
+ */
+
+ /* find an ethdev entry */
+ eth_dev = rte_eth_dev_allocated(name);
+ if (eth_dev == NULL)
+ return -ENODEV;
+
+ RTE_ASSERT(eth_dev->device == &dev->device);
+
+ internals = eth_dev->data->dev_private;
+ if (internals->slave_count != 0)
+ return -EBUSY;
- return ret;
+ if (eth_dev->data->dev_started == 1) {
+ bond_ethdev_stop(eth_dev);
+ bond_ethdev_close(eth_dev);
+ }
+
+ eth_dev->dev_ops = NULL;
+ eth_dev->rx_pkt_burst = NULL;
+ eth_dev->tx_pkt_burst = NULL;
+
+ internals = eth_dev->data->dev_private;
+ rte_bitmap_free(internals->vlan_filter_bmp);
+ rte_free(internals->vlan_filter_bmpmem);
+ rte_free(eth_dev->data->dev_private);
+ rte_free(eth_dev->data->mac_addrs);
+
+ rte_eth_dev_release_port(eth_dev);
+
+ return 0;
}
/* this part will resolve the slave portids after all the other pdev and vdev
return 0;
}
-static struct rte_driver bond_drv = {
- .name = "eth_bond",
- .type = PMD_VDEV,
- .init = bond_init,
- .uninit = bond_uninit,
+struct rte_vdev_driver pmd_bond_drv = {
+ .probe = bond_probe,
+ .remove = bond_remove,
};
-PMD_REGISTER_DRIVER(bond_drv, bonding);
-
-DRIVER_REGISTER_PARAM_STRING(bonding, "slave=<ifc> primary=<ifc> mode=[0-4] "
-"xmit_policy=[l2 | l23 | l34] socket_id=<int> mac=<mac addr> "
-"lsc_poll_period_ms=<int> up_delay=<int> down_delay=<int>");
-
+RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
+RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
+
+RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
+ "slave=<ifc> "
+ "primary=<ifc> "
+ "mode=[0-6] "
+ "xmit_policy=[l2 | l23 | l34] "
+ "socket_id=<int> "
+ "mac=<mac addr> "
+ "lsc_poll_period_ms=<int> "
+ "up_delay=<int> "
+ "down_delay=<int>");