4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
41 #include "rte_eth_bond.h"
42 #include "rte_eth_bond_private.h"
43 #include "rte_eth_bond_8023ad_private.h"
45 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
48 valid_bonded_ethdev(const struct rte_eth_dev *eth_dev)
50 /* Check valid pointer */
51 if (eth_dev->driver->pci_drv.name == NULL)
54 /* return 0 if driver name matches */
55 return eth_dev->driver->pci_drv.name != pmd_bond_driver_name;
59 valid_bonded_port_id(uint8_t port_id)
61 if (!rte_eth_dev_is_valid_port(port_id))
64 return valid_bonded_ethdev(&rte_eth_devices[port_id]);
68 valid_slave_port_id(uint8_t port_id)
70 /* Verify that port id's are valid */
71 if (!rte_eth_dev_is_valid_port(port_id))
74 /* Verify that port_id refers to a non bonded port */
75 if (!valid_bonded_ethdev(&rte_eth_devices[port_id]))
82 activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
84 struct bond_dev_private *internals = eth_dev->data->dev_private;
85 uint8_t active_count = internals->active_slave_count;
87 if (internals->mode == BONDING_MODE_8023AD)
88 bond_mode_8023ad_activate_slave(eth_dev, port_id);
90 if (internals->mode == BONDING_MODE_TLB
91 || internals->mode == BONDING_MODE_ALB) {
93 internals->tlb_slaves_order[active_count] = port_id;
96 RTE_VERIFY(internals->active_slave_count <
97 (RTE_DIM(internals->active_slaves) - 1));
99 internals->active_slaves[internals->active_slave_count] = port_id;
100 internals->active_slave_count++;
102 if (internals->mode == BONDING_MODE_TLB)
103 bond_tlb_activate_slave(internals);
104 if (internals->mode == BONDING_MODE_ALB)
105 bond_mode_alb_client_list_upd(eth_dev);
109 deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
112 struct bond_dev_private *internals = eth_dev->data->dev_private;
113 uint8_t active_count = internals->active_slave_count;
115 if (internals->mode == BONDING_MODE_8023AD) {
116 bond_mode_8023ad_stop(eth_dev);
117 bond_mode_8023ad_deactivate_slave(eth_dev, port_id);
118 } else if (internals->mode == BONDING_MODE_TLB
119 || internals->mode == BONDING_MODE_ALB)
120 bond_tlb_disable(internals);
122 slave_pos = find_slave_by_id(internals->active_slaves, active_count,
125 /* If slave was not at the end of the list
126 * shift active slaves up active array list */
127 if (slave_pos < active_count) {
129 memmove(internals->active_slaves + slave_pos,
130 internals->active_slaves + slave_pos + 1,
131 (active_count - slave_pos) *
132 sizeof(internals->active_slaves[0]));
135 RTE_VERIFY(active_count < RTE_DIM(internals->active_slaves));
136 internals->active_slave_count = active_count;
138 if (eth_dev->data->dev_started) {
139 if (internals->mode == BONDING_MODE_8023AD) {
140 bond_mode_8023ad_start(eth_dev);
141 } else if (internals->mode == BONDING_MODE_TLB) {
142 bond_tlb_enable(internals);
143 } else if (internals->mode == BONDING_MODE_ALB) {
144 bond_tlb_enable(internals);
145 bond_mode_alb_client_list_upd(eth_dev);
151 number_of_sockets(void)
155 const struct rte_memseg *ms = rte_eal_get_physmem_layout();
157 for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
158 if (sockets < ms[i].socket_id)
159 sockets = ms[i].socket_id;
162 /* Number of sockets = maximum socket_id + 1 */
166 const char *pmd_bond_driver_name = "Link Bonding PMD";
169 rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
171 struct rte_pci_device *pci_dev = NULL;
172 struct bond_dev_private *internals = NULL;
173 struct rte_eth_dev *eth_dev = NULL;
174 struct eth_driver *eth_drv = NULL;
175 struct rte_pci_driver *pci_drv = NULL;
176 struct rte_pci_id *pci_id_table = NULL;
177 /* now do all data allocation - for eth_dev structure, dummy pci driver
178 * and internal (private) data
182 RTE_BOND_LOG(ERR, "Invalid name specified");
186 if (socket_id >= number_of_sockets()) {
188 "Invalid socket id specified to create bonded device on.");
192 pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id);
193 if (pci_dev == NULL) {
194 RTE_BOND_LOG(ERR, "Unable to malloc pci dev on socket");
198 eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id);
199 if (eth_drv == NULL) {
200 RTE_BOND_LOG(ERR, "Unable to malloc eth_drv on socket");
204 pci_drv = ð_drv->pci_drv;
206 pci_id_table = rte_zmalloc_socket(name, sizeof(*pci_id_table), 0, socket_id);
207 if (pci_id_table == NULL) {
208 RTE_BOND_LOG(ERR, "Unable to malloc pci_id_table on socket");
211 pci_id_table->device_id = PCI_ANY_ID;
212 pci_id_table->subsystem_device_id = PCI_ANY_ID;
213 pci_id_table->vendor_id = PCI_ANY_ID;
214 pci_id_table->subsystem_vendor_id = PCI_ANY_ID;
216 pci_drv->id_table = pci_id_table;
217 pci_drv->drv_flags = RTE_PCI_DRV_INTR_LSC;
219 internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id);
220 if (internals == NULL) {
221 RTE_BOND_LOG(ERR, "Unable to malloc internals on socket");
225 /* reserve an ethdev entry */
226 eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
227 if (eth_dev == NULL) {
228 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
232 pci_dev->numa_node = socket_id;
233 pci_drv->name = pmd_bond_driver_name;
234 pci_dev->driver = pci_drv;
236 eth_dev->driver = eth_drv;
237 eth_dev->data->dev_private = internals;
238 eth_dev->data->nb_rx_queues = (uint16_t)1;
239 eth_dev->data->nb_tx_queues = (uint16_t)1;
241 TAILQ_INIT(&(eth_dev->link_intr_cbs));
243 eth_dev->data->dev_link.link_status = 0;
245 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
248 eth_dev->data->dev_started = 0;
249 eth_dev->data->promiscuous = 0;
250 eth_dev->data->scattered_rx = 0;
251 eth_dev->data->all_multicast = 0;
253 eth_dev->dev_ops = &default_dev_ops;
254 eth_dev->pci_dev = pci_dev;
256 rte_spinlock_init(&internals->lock);
258 internals->port_id = eth_dev->data->port_id;
259 internals->mode = BONDING_MODE_INVALID;
260 internals->current_primary_port = 0;
261 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
262 internals->xmit_hash = xmit_l2_hash;
263 internals->user_defined_mac = 0;
264 internals->link_props_set = 0;
266 internals->link_status_polling_enabled = 0;
268 internals->link_status_polling_interval_ms = DEFAULT_POLLING_INTERVAL_10_MS;
269 internals->link_down_delay_ms = 0;
270 internals->link_up_delay_ms = 0;
272 internals->slave_count = 0;
273 internals->active_slave_count = 0;
274 internals->rx_offload_capa = 0;
275 internals->tx_offload_capa = 0;
277 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
278 memset(internals->slaves, 0, sizeof(internals->slaves));
280 /* Set mode 4 default configuration */
281 bond_mode_8023ad_setup(eth_dev, NULL);
282 if (bond_ethdev_mode_set(eth_dev, mode)) {
283 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
284 eth_dev->data->port_id, mode);
288 return eth_dev->data->port_id;
292 rte_free(pci_id_table);
300 __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
302 struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
303 struct bond_dev_private *internals;
304 struct bond_dev_private *temp_internals;
305 struct rte_eth_link link_props;
306 struct rte_eth_dev_info dev_info;
310 if (valid_slave_port_id(slave_port_id) != 0)
313 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
314 internals = bonded_eth_dev->data->dev_private;
316 /* Verify that new slave device is not already a slave of another
318 for (i = rte_eth_dev_count()-1; i >= 0; i--) {
319 if (valid_bonded_ethdev(&rte_eth_devices[i]) == 0) {
320 temp_internals = rte_eth_devices[i].data->dev_private;
322 for (j = 0; j < temp_internals->slave_count; j++) {
323 /* Device already a slave of a bonded device */
324 if (temp_internals->slaves[j].port_id == slave_port_id) {
325 RTE_BOND_LOG(ERR, "Slave port %d is already a slave",
333 slave_eth_dev = &rte_eth_devices[slave_port_id];
335 /* Add slave details to bonded device */
336 slave_add(internals, slave_eth_dev);
338 rte_eth_dev_info_get(slave_port_id, &dev_info);
340 if (internals->slave_count < 1) {
341 /* if MAC is not user defined then use MAC of first slave add to
343 if (!internals->user_defined_mac)
344 mac_address_set(bonded_eth_dev, slave_eth_dev->data->mac_addrs);
346 /* Inherit eth dev link properties from first slave */
347 link_properties_set(bonded_eth_dev,
348 &(slave_eth_dev->data->dev_link));
350 /* Make primary slave */
351 internals->primary_port = slave_port_id;
353 /* Take the first dev's offload capabilities */
354 internals->rx_offload_capa = dev_info.rx_offload_capa;
355 internals->tx_offload_capa = dev_info.tx_offload_capa;
358 /* Check slave link properties are supported if props are set,
359 * all slaves must be the same */
360 if (internals->link_props_set) {
361 if (link_properties_valid(&(bonded_eth_dev->data->dev_link),
362 &(slave_eth_dev->data->dev_link))) {
364 "Slave port %d link speed/duplex not supported",
369 link_properties_set(bonded_eth_dev,
370 &(slave_eth_dev->data->dev_link));
372 internals->rx_offload_capa &= dev_info.rx_offload_capa;
373 internals->tx_offload_capa &= dev_info.tx_offload_capa;
376 internals->slave_count++;
378 /* Update all slave devices MACs*/
379 mac_address_slaves_update(bonded_eth_dev);
381 if (bonded_eth_dev->data->dev_started) {
382 if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) {
383 RTE_BOND_LOG(ERR, "rte_bond_slaves_configure: port=%d",
389 /* Register link status change callback with bonded device pointer as
391 rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
392 bond_ethdev_lsc_event_callback, &bonded_eth_dev->data->port_id);
394 /* If bonded device is started then we can add the slave to our active
396 if (bonded_eth_dev->data->dev_started) {
397 rte_eth_link_get_nowait(slave_port_id, &link_props);
399 if (link_props.link_status == 1)
400 activate_slave(bonded_eth_dev, slave_port_id);
407 rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
409 struct rte_eth_dev *bonded_eth_dev;
410 struct bond_dev_private *internals;
414 /* Verify that port id's are valid bonded and slave ports */
415 if (valid_bonded_port_id(bonded_port_id) != 0)
418 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
419 internals = bonded_eth_dev->data->dev_private;
421 rte_spinlock_lock(&internals->lock);
423 retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id);
425 rte_spinlock_unlock(&internals->lock);
431 __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
433 struct rte_eth_dev *bonded_eth_dev;
434 struct bond_dev_private *internals;
438 if (valid_slave_port_id(slave_port_id) != 0)
441 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
442 internals = bonded_eth_dev->data->dev_private;
444 /* first remove from active slave list */
445 slave_idx = find_slave_by_id(internals->active_slaves,
446 internals->active_slave_count, slave_port_id);
448 if (slave_idx < internals->active_slave_count)
449 deactivate_slave(bonded_eth_dev, slave_port_id);
452 /* now find in slave list */
453 for (i = 0; i < internals->slave_count; i++)
454 if (internals->slaves[i].port_id == slave_port_id) {
460 RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %d",
461 internals->slave_count);
465 /* Un-register link status change callback with bonded device pointer as
467 rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
468 bond_ethdev_lsc_event_callback,
469 &rte_eth_devices[bonded_port_id].data->port_id);
471 /* Restore original MAC address of slave device */
472 mac_address_set(&rte_eth_devices[slave_port_id],
473 &(internals->slaves[slave_idx].persisted_mac_addr));
475 slave_remove(internals, &rte_eth_devices[slave_port_id]);
477 /* first slave in the active list will be the primary by default,
478 * otherwise use first device in list */
479 if (internals->current_primary_port == slave_port_id) {
480 if (internals->active_slave_count > 0)
481 internals->current_primary_port = internals->active_slaves[0];
482 else if (internals->slave_count > 0)
483 internals->current_primary_port = internals->slaves[0].port_id;
485 internals->primary_port = 0;
488 if (internals->active_slave_count < 1) {
489 /* reset device link properties as no slaves are active */
490 link_properties_reset(&rte_eth_devices[bonded_port_id]);
492 /* if no slaves are any longer attached to bonded device and MAC is not
493 * user defined then clear MAC of bonded device as it will be reset
494 * when a new slave is added */
495 if (internals->slave_count < 1 && !internals->user_defined_mac)
496 memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0,
497 sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs)));
499 if (internals->slave_count == 0) {
500 internals->rx_offload_capa = 0;
501 internals->tx_offload_capa = 0;
507 rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
509 struct rte_eth_dev *bonded_eth_dev;
510 struct bond_dev_private *internals;
513 if (valid_bonded_port_id(bonded_port_id) != 0)
516 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
517 internals = bonded_eth_dev->data->dev_private;
519 rte_spinlock_lock(&internals->lock);
521 retval = __eth_bond_slave_remove_lock_free(bonded_port_id, slave_port_id);
523 rte_spinlock_unlock(&internals->lock);
529 rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode)
531 if (valid_bonded_port_id(bonded_port_id) != 0)
534 return bond_ethdev_mode_set(&rte_eth_devices[bonded_port_id], mode);
538 rte_eth_bond_mode_get(uint8_t bonded_port_id)
540 struct bond_dev_private *internals;
542 if (valid_bonded_port_id(bonded_port_id) != 0)
545 internals = rte_eth_devices[bonded_port_id].data->dev_private;
547 return internals->mode;
551 rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id)
553 struct bond_dev_private *internals;
555 if (valid_bonded_port_id(bonded_port_id) != 0)
558 if (valid_slave_port_id(slave_port_id) != 0)
561 internals = rte_eth_devices[bonded_port_id].data->dev_private;
563 internals->user_defined_primary_port = 1;
564 internals->primary_port = slave_port_id;
566 bond_ethdev_primary_set(internals, slave_port_id);
572 rte_eth_bond_primary_get(uint8_t bonded_port_id)
574 struct bond_dev_private *internals;
576 if (valid_bonded_port_id(bonded_port_id) != 0)
579 internals = rte_eth_devices[bonded_port_id].data->dev_private;
581 if (internals->slave_count < 1)
584 return internals->current_primary_port;
588 rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
590 struct bond_dev_private *internals;
593 if (valid_bonded_port_id(bonded_port_id) != 0)
599 internals = rte_eth_devices[bonded_port_id].data->dev_private;
601 if (internals->slave_count > len)
604 for (i = 0; i < internals->slave_count; i++)
605 slaves[i] = internals->slaves[i].port_id;
607 return internals->slave_count;
611 rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
614 struct bond_dev_private *internals;
616 if (valid_bonded_port_id(bonded_port_id) != 0)
622 internals = rte_eth_devices[bonded_port_id].data->dev_private;
624 if (internals->active_slave_count > len)
627 memcpy(slaves, internals->active_slaves, internals->active_slave_count);
629 return internals->active_slave_count;
633 rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
634 struct ether_addr *mac_addr)
636 struct rte_eth_dev *bonded_eth_dev;
637 struct bond_dev_private *internals;
639 if (valid_bonded_port_id(bonded_port_id) != 0)
642 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
643 internals = bonded_eth_dev->data->dev_private;
645 /* Set MAC Address of Bonded Device */
646 if (mac_address_set(bonded_eth_dev, mac_addr))
649 internals->user_defined_mac = 1;
651 /* Update all slave devices MACs*/
652 if (internals->slave_count > 0)
653 return mac_address_slaves_update(bonded_eth_dev);
659 rte_eth_bond_mac_address_reset(uint8_t bonded_port_id)
661 struct rte_eth_dev *bonded_eth_dev;
662 struct bond_dev_private *internals;
664 if (valid_bonded_port_id(bonded_port_id) != 0)
667 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
668 internals = bonded_eth_dev->data->dev_private;
670 internals->user_defined_mac = 0;
672 if (internals->slave_count > 0) {
673 /* Set MAC Address of Bonded Device */
674 if (mac_address_set(bonded_eth_dev,
675 &internals->slaves[internals->primary_port].persisted_mac_addr)
677 RTE_BOND_LOG(ERR, "Failed to set MAC address on bonded device");
680 /* Update all slave devices MAC addresses */
681 return mac_address_slaves_update(bonded_eth_dev);
683 /* No need to update anything as no slaves present */
688 rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
690 struct bond_dev_private *internals;
692 if (valid_bonded_port_id(bonded_port_id) != 0)
695 internals = rte_eth_devices[bonded_port_id].data->dev_private;
698 case BALANCE_XMIT_POLICY_LAYER2:
699 internals->balance_xmit_policy = policy;
700 internals->xmit_hash = xmit_l2_hash;
702 case BALANCE_XMIT_POLICY_LAYER23:
703 internals->balance_xmit_policy = policy;
704 internals->xmit_hash = xmit_l23_hash;
706 case BALANCE_XMIT_POLICY_LAYER34:
707 internals->balance_xmit_policy = policy;
708 internals->xmit_hash = xmit_l34_hash;
718 rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
720 struct bond_dev_private *internals;
722 if (valid_bonded_port_id(bonded_port_id) != 0)
725 internals = rte_eth_devices[bonded_port_id].data->dev_private;
727 return internals->balance_xmit_policy;
731 rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
733 struct bond_dev_private *internals;
735 if (valid_bonded_port_id(bonded_port_id) != 0)
738 internals = rte_eth_devices[bonded_port_id].data->dev_private;
739 internals->link_status_polling_interval_ms = internal_ms;
745 rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id)
747 struct bond_dev_private *internals;
749 if (valid_bonded_port_id(bonded_port_id) != 0)
752 internals = rte_eth_devices[bonded_port_id].data->dev_private;
754 return internals->link_status_polling_interval_ms;
758 rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
761 struct bond_dev_private *internals;
763 if (valid_bonded_port_id(bonded_port_id) != 0)
766 internals = rte_eth_devices[bonded_port_id].data->dev_private;
767 internals->link_down_delay_ms = delay_ms;
773 rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
775 struct bond_dev_private *internals;
777 if (valid_bonded_port_id(bonded_port_id) != 0)
780 internals = rte_eth_devices[bonded_port_id].data->dev_private;
782 return internals->link_down_delay_ms;
786 rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
789 struct bond_dev_private *internals;
791 if (valid_bonded_port_id(bonded_port_id) != 0)
794 internals = rte_eth_devices[bonded_port_id].data->dev_private;
795 internals->link_up_delay_ms = delay_ms;
801 rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id)
803 struct bond_dev_private *internals;
805 if (valid_bonded_port_id(bonded_port_id) != 0)
808 internals = rte_eth_devices[bonded_port_id].data->dev_private;
810 return internals->link_up_delay_ms;