4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
41 #include "rte_eth_bond.h"
42 #include "rte_eth_bond_private.h"
43 #include "rte_eth_bond_8023ad_private.h"
45 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
48 valid_bonded_ethdev(struct rte_eth_dev *eth_dev)
52 /* Check valid pointer */
53 if (eth_dev->driver->pci_drv.name == NULL || driver_name == NULL)
56 /* Check string lengths are equal */
57 len = strlen(driver_name);
58 if (strlen(eth_dev->driver->pci_drv.name) != len)
62 return strncmp(eth_dev->driver->pci_drv.name, driver_name, len);
66 valid_port_id(uint8_t port_id)
68 /* Verify that port id is valid */
69 int ethdev_count = rte_eth_dev_count();
70 if (port_id >= ethdev_count) {
71 RTE_BOND_LOG(ERR, "Port Id %d is greater than rte_eth_dev_count %d",
72 port_id, ethdev_count);
80 valid_bonded_port_id(uint8_t port_id)
82 /* Verify that port id's are valid */
83 if (valid_port_id(port_id))
86 /* Verify that bonded_port_id refers to a bonded port */
87 if (valid_bonded_ethdev(&rte_eth_devices[port_id])) {
88 RTE_BOND_LOG(ERR, "Specified port Id %d is not a bonded eth_dev device",
97 valid_slave_port_id(uint8_t port_id)
99 /* Verify that port id's are valid */
100 if (valid_port_id(port_id))
103 /* Verify that port_id refers to a non bonded port */
104 if (!valid_bonded_ethdev(&rte_eth_devices[port_id]))
111 activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
113 struct bond_dev_private *internals = eth_dev->data->dev_private;
115 if (internals->mode == BONDING_MODE_8023AD)
116 bond_mode_8023ad_activate_slave(eth_dev, port_id);
118 RTE_VERIFY(internals->active_slave_count <
119 (RTE_DIM(internals->active_slaves) - 1));
121 internals->active_slaves[internals->active_slave_count] = port_id;
122 internals->active_slave_count++;
126 deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
129 struct bond_dev_private *internals = eth_dev->data->dev_private;
130 uint8_t active_count = internals->active_slave_count;
132 if (internals->mode == BONDING_MODE_8023AD) {
133 bond_mode_8023ad_stop(eth_dev);
134 bond_mode_8023ad_deactivate_slave(eth_dev, port_id);
137 slave_pos = find_slave_by_id(internals->active_slaves, active_count,
140 /* If slave was not at the end of the list
141 * shift active slaves up active array list */
142 if (slave_pos < active_count) {
144 memmove(internals->active_slaves + slave_pos,
145 internals->active_slaves + slave_pos + 1,
146 (active_count - slave_pos) *
147 sizeof(internals->active_slaves[0]));
150 RTE_VERIFY(active_count < RTE_DIM(internals->active_slaves));
151 internals->active_slave_count = active_count;
153 if (eth_dev->data->dev_started && internals->mode == BONDING_MODE_8023AD)
154 bond_mode_8023ad_start(eth_dev);
158 number_of_sockets(void)
162 const struct rte_memseg *ms = rte_eal_get_physmem_layout();
164 for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
165 if (sockets < ms[i].socket_id)
166 sockets = ms[i].socket_id;
169 /* Number of sockets = maximum socket_id + 1 */
173 const char *driver_name = "Link Bonding PMD";
176 rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
178 struct rte_pci_device *pci_dev = NULL;
179 struct bond_dev_private *internals = NULL;
180 struct rte_eth_dev *eth_dev = NULL;
181 struct eth_driver *eth_drv = NULL;
182 struct rte_pci_driver *pci_drv = NULL;
183 struct rte_pci_id *pci_id_table = NULL;
184 /* now do all data allocation - for eth_dev structure, dummy pci driver
185 * and internal (private) data
189 RTE_BOND_LOG(ERR, "Invalid name specified");
193 if (socket_id >= number_of_sockets()) {
195 "Invalid socket id specified to create bonded device on.");
199 pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id);
200 if (pci_dev == NULL) {
201 RTE_BOND_LOG(ERR, "Unable to malloc pci dev on socket");
205 eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id);
206 if (eth_drv == NULL) {
207 RTE_BOND_LOG(ERR, "Unable to malloc eth_drv on socket");
211 pci_drv = rte_zmalloc_socket(name, sizeof(*pci_drv), 0, socket_id);
212 if (pci_drv == NULL) {
213 RTE_BOND_LOG(ERR, "Unable to malloc pci_drv on socket");
216 pci_id_table = rte_zmalloc_socket(name, sizeof(*pci_id_table), 0, socket_id);
217 if (pci_id_table == NULL) {
218 RTE_BOND_LOG(ERR, "Unable to malloc pci_id_table on socket");
222 pci_drv->id_table = pci_id_table;
224 pci_drv->id_table->device_id = PCI_ANY_ID;
225 pci_drv->id_table->subsystem_device_id = PCI_ANY_ID;
226 pci_drv->id_table->vendor_id = PCI_ANY_ID;
227 pci_drv->id_table->subsystem_vendor_id = PCI_ANY_ID;
229 pci_drv->drv_flags = RTE_PCI_DRV_INTR_LSC;
231 internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id);
232 if (internals == NULL) {
233 RTE_BOND_LOG(ERR, "Unable to malloc internals on socket");
237 /* reserve an ethdev entry */
238 eth_dev = rte_eth_dev_allocate(name);
239 if (eth_dev == NULL) {
240 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
244 pci_dev->numa_node = socket_id;
245 pci_drv->name = driver_name;
247 eth_drv->pci_drv = (struct rte_pci_driver)(*pci_drv);
248 eth_dev->driver = eth_drv;
250 eth_dev->data->dev_private = internals;
251 eth_dev->data->nb_rx_queues = (uint16_t)1;
252 eth_dev->data->nb_tx_queues = (uint16_t)1;
254 TAILQ_INIT(&(eth_dev->callbacks));
256 eth_dev->data->dev_link.link_status = 0;
258 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
261 eth_dev->data->dev_started = 0;
262 eth_dev->data->promiscuous = 0;
263 eth_dev->data->scattered_rx = 0;
264 eth_dev->data->all_multicast = 0;
266 eth_dev->dev_ops = &default_dev_ops;
267 eth_dev->pci_dev = pci_dev;
269 rte_spinlock_init(&internals->lock);
271 internals->port_id = eth_dev->data->port_id;
272 internals->mode = BONDING_MODE_INVALID;
273 internals->current_primary_port = 0;
274 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
275 internals->xmit_hash = xmit_l2_hash;
276 internals->user_defined_mac = 0;
277 internals->link_props_set = 0;
279 internals->link_status_polling_enabled = 0;
281 internals->link_status_polling_interval_ms = DEFAULT_POLLING_INTERVAL_10_MS;
282 internals->link_down_delay_ms = 0;
283 internals->link_up_delay_ms = 0;
285 internals->slave_count = 0;
286 internals->active_slave_count = 0;
287 internals->rx_offload_capa = 0;
288 internals->tx_offload_capa = 0;
290 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
291 memset(internals->slaves, 0, sizeof(internals->slaves));
293 /* Set mode 4 default configuration */
294 bond_mode_8023ad_setup(eth_dev, NULL);
295 if (bond_ethdev_mode_set(eth_dev, mode)) {
296 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
297 eth_dev->data->port_id, mode);
301 return eth_dev->data->port_id;
309 rte_free(pci_id_table);
318 __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
320 struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
321 struct bond_dev_private *internals;
322 struct bond_dev_private *temp_internals;
323 struct rte_eth_link link_props;
324 struct rte_eth_dev_info dev_info;
328 if (valid_slave_port_id(slave_port_id) != 0)
331 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
332 internals = bonded_eth_dev->data->dev_private;
334 /* Verify that new slave device is not already a slave of another
336 for (i = rte_eth_dev_count()-1; i >= 0; i--) {
337 if (valid_bonded_ethdev(&rte_eth_devices[i]) == 0) {
338 temp_internals = rte_eth_devices[i].data->dev_private;
340 for (j = 0; j < temp_internals->slave_count; j++) {
341 /* Device already a slave of a bonded device */
342 if (temp_internals->slaves[j].port_id == slave_port_id) {
343 RTE_BOND_LOG(ERR, "Slave port %d is already a slave",
351 slave_eth_dev = &rte_eth_devices[slave_port_id];
353 /* Add slave details to bonded device */
354 slave_add(internals, slave_eth_dev);
356 memset(&dev_info, 0, sizeof(dev_info));
357 rte_eth_dev_info_get(slave_port_id, &dev_info);
359 if (internals->slave_count < 1) {
360 /* if MAC is not user defined then use MAC of first slave add to
362 if (!internals->user_defined_mac)
363 mac_address_set(bonded_eth_dev, slave_eth_dev->data->mac_addrs);
365 /* Inherit eth dev link properties from first slave */
366 link_properties_set(bonded_eth_dev,
367 &(slave_eth_dev->data->dev_link));
369 /* Make primary slave */
370 internals->primary_port = slave_port_id;
372 /* Take the first dev's offload capabilities */
373 internals->rx_offload_capa = dev_info.rx_offload_capa;
374 internals->tx_offload_capa = dev_info.tx_offload_capa;
377 /* Check slave link properties are supported if props are set,
378 * all slaves must be the same */
379 if (internals->link_props_set) {
380 if (link_properties_valid(&(bonded_eth_dev->data->dev_link),
381 &(slave_eth_dev->data->dev_link))) {
383 "Slave port %d link speed/duplex not supported",
388 link_properties_set(bonded_eth_dev,
389 &(slave_eth_dev->data->dev_link));
391 internals->rx_offload_capa &= dev_info.rx_offload_capa;
392 internals->tx_offload_capa &= dev_info.tx_offload_capa;
395 internals->slave_count++;
397 /* Update all slave devices MACs*/
398 mac_address_slaves_update(bonded_eth_dev);
400 if (bonded_eth_dev->data->dev_started) {
401 if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) {
402 RTE_BOND_LOG(ERR, "rte_bond_slaves_configure: port=%d",
408 /* Register link status change callback with bonded device pointer as
410 rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
411 bond_ethdev_lsc_event_callback, &bonded_eth_dev->data->port_id);
413 /* If bonded device is started then we can add the slave to our active
415 if (bonded_eth_dev->data->dev_started) {
416 rte_eth_link_get_nowait(slave_port_id, &link_props);
418 if (link_props.link_status == 1)
419 activate_slave(bonded_eth_dev, slave_port_id);
426 rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
428 struct rte_eth_dev *bonded_eth_dev;
429 struct bond_dev_private *internals;
433 /* Verify that port id's are valid bonded and slave ports */
434 if (valid_bonded_port_id(bonded_port_id) != 0)
437 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
438 internals = bonded_eth_dev->data->dev_private;
440 rte_spinlock_lock(&internals->lock);
442 retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id);
444 rte_spinlock_unlock(&internals->lock);
450 __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
452 struct rte_eth_dev *bonded_eth_dev;
453 struct bond_dev_private *internals;
457 if (valid_slave_port_id(slave_port_id) != 0)
460 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
461 internals = bonded_eth_dev->data->dev_private;
463 /* first remove from active slave list */
464 slave_idx = find_slave_by_id(internals->active_slaves,
465 internals->active_slave_count, slave_port_id);
467 if (slave_idx < internals->active_slave_count)
468 deactivate_slave(bonded_eth_dev, slave_port_id);
471 /* now find in slave list */
472 for (i = 0; i < internals->slave_count; i++)
473 if (internals->slaves[i].port_id == slave_port_id) {
479 RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %d",
480 internals->slave_count);
484 /* Un-register link status change callback with bonded device pointer as
486 rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
487 bond_ethdev_lsc_event_callback,
488 &rte_eth_devices[bonded_port_id].data->port_id);
490 /* Restore original MAC address of slave device */
491 mac_address_set(&rte_eth_devices[slave_port_id],
492 &(internals->slaves[slave_idx].persisted_mac_addr));
494 slave_remove(internals, &rte_eth_devices[slave_port_id]);
496 /* first slave in the active list will be the primary by default,
497 * otherwise use first device in list */
498 if (internals->current_primary_port == slave_port_id) {
499 if (internals->active_slave_count > 0)
500 internals->current_primary_port = internals->active_slaves[0];
501 else if (internals->slave_count > 0)
502 internals->current_primary_port = internals->slaves[0].port_id;
504 internals->primary_port = 0;
507 if (internals->active_slave_count < 1) {
508 /* reset device link properties as no slaves are active */
509 link_properties_reset(&rte_eth_devices[bonded_port_id]);
511 /* if no slaves are any longer attached to bonded device and MAC is not
512 * user defined then clear MAC of bonded device as it will be reset
513 * when a new slave is added */
514 if (internals->slave_count < 1 && !internals->user_defined_mac)
515 memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0,
516 sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs)));
518 if (internals->slave_count == 0) {
519 internals->rx_offload_capa = 0;
520 internals->tx_offload_capa = 0;
526 rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
528 struct rte_eth_dev *bonded_eth_dev;
529 struct bond_dev_private *internals;
532 if (valid_bonded_port_id(bonded_port_id) != 0)
535 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
536 internals = bonded_eth_dev->data->dev_private;
538 rte_spinlock_lock(&internals->lock);
540 retval = __eth_bond_slave_remove_lock_free(bonded_port_id, slave_port_id);
542 rte_spinlock_unlock(&internals->lock);
548 rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode)
550 if (valid_bonded_port_id(bonded_port_id) != 0)
553 return bond_ethdev_mode_set(&rte_eth_devices[bonded_port_id], mode);
557 rte_eth_bond_mode_get(uint8_t bonded_port_id)
559 struct bond_dev_private *internals;
561 if (valid_bonded_port_id(bonded_port_id) != 0)
564 internals = rte_eth_devices[bonded_port_id].data->dev_private;
566 return internals->mode;
570 rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id)
572 struct bond_dev_private *internals;
574 if (valid_bonded_port_id(bonded_port_id) != 0)
577 if (valid_slave_port_id(slave_port_id) != 0)
580 internals = rte_eth_devices[bonded_port_id].data->dev_private;
582 internals->user_defined_primary_port = 1;
583 internals->primary_port = slave_port_id;
585 bond_ethdev_primary_set(internals, slave_port_id);
591 rte_eth_bond_primary_get(uint8_t bonded_port_id)
593 struct bond_dev_private *internals;
595 if (valid_bonded_port_id(bonded_port_id) != 0)
598 internals = rte_eth_devices[bonded_port_id].data->dev_private;
600 if (internals->slave_count < 1)
603 return internals->current_primary_port;
607 rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
609 struct bond_dev_private *internals;
612 if (valid_bonded_port_id(bonded_port_id) != 0)
618 internals = rte_eth_devices[bonded_port_id].data->dev_private;
620 if (internals->slave_count > len)
623 for (i = 0; i < internals->slave_count; i++)
624 slaves[i] = internals->slaves[i].port_id;
626 return internals->slave_count;
630 rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
633 struct bond_dev_private *internals;
635 if (valid_bonded_port_id(bonded_port_id) != 0)
641 internals = rte_eth_devices[bonded_port_id].data->dev_private;
643 if (internals->active_slave_count > len)
646 memcpy(slaves, internals->active_slaves, internals->active_slave_count);
648 return internals->active_slave_count;
652 rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
653 struct ether_addr *mac_addr)
655 struct rte_eth_dev *bonded_eth_dev;
656 struct bond_dev_private *internals;
658 if (valid_bonded_port_id(bonded_port_id) != 0)
661 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
662 internals = bonded_eth_dev->data->dev_private;
664 /* Set MAC Address of Bonded Device */
665 if (mac_address_set(bonded_eth_dev, mac_addr))
668 internals->user_defined_mac = 1;
670 /* Update all slave devices MACs*/
671 if (internals->slave_count > 0)
672 return mac_address_slaves_update(bonded_eth_dev);
678 rte_eth_bond_mac_address_reset(uint8_t bonded_port_id)
680 struct rte_eth_dev *bonded_eth_dev;
681 struct bond_dev_private *internals;
683 if (valid_bonded_port_id(bonded_port_id) != 0)
686 bonded_eth_dev = &rte_eth_devices[bonded_port_id];
687 internals = bonded_eth_dev->data->dev_private;
689 internals->user_defined_mac = 0;
691 if (internals->slave_count > 0) {
692 /* Set MAC Address of Bonded Device */
693 if (mac_address_set(bonded_eth_dev,
694 &internals->slaves[internals->primary_port].persisted_mac_addr)
696 RTE_BOND_LOG(ERR, "Failed to set MAC address on bonded device");
699 /* Update all slave devices MAC addresses */
700 return mac_address_slaves_update(bonded_eth_dev);
702 /* No need to update anything as no slaves present */
707 rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
709 struct bond_dev_private *internals;
711 if (valid_bonded_port_id(bonded_port_id) != 0)
714 internals = rte_eth_devices[bonded_port_id].data->dev_private;
717 case BALANCE_XMIT_POLICY_LAYER2:
718 internals->balance_xmit_policy = policy;
719 internals->xmit_hash = xmit_l2_hash;
721 case BALANCE_XMIT_POLICY_LAYER23:
722 internals->balance_xmit_policy = policy;
723 internals->xmit_hash = xmit_l23_hash;
725 case BALANCE_XMIT_POLICY_LAYER34:
726 internals->balance_xmit_policy = policy;
727 internals->xmit_hash = xmit_l34_hash;
737 rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
739 struct bond_dev_private *internals;
741 if (valid_bonded_port_id(bonded_port_id) != 0)
744 internals = rte_eth_devices[bonded_port_id].data->dev_private;
746 return internals->balance_xmit_policy;
750 rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
752 struct bond_dev_private *internals;
754 if (valid_bonded_port_id(bonded_port_id) != 0)
757 internals = rte_eth_devices[bonded_port_id].data->dev_private;
758 internals->link_status_polling_interval_ms = internal_ms;
764 rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id)
766 struct bond_dev_private *internals;
768 if (valid_bonded_port_id(bonded_port_id) != 0)
771 internals = rte_eth_devices[bonded_port_id].data->dev_private;
773 return internals->link_status_polling_interval_ms;
777 rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
780 struct bond_dev_private *internals;
782 if (valid_bonded_port_id(bonded_port_id) != 0)
785 internals = rte_eth_devices[bonded_port_id].data->dev_private;
786 internals->link_down_delay_ms = delay_ms;
792 rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
794 struct bond_dev_private *internals;
796 if (valid_bonded_port_id(bonded_port_id) != 0)
799 internals = rte_eth_devices[bonded_port_id].data->dev_private;
801 return internals->link_down_delay_ms;
805 rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
808 struct bond_dev_private *internals;
810 if (valid_bonded_port_id(bonded_port_id) != 0)
813 internals = rte_eth_devices[bonded_port_id].data->dev_private;
814 internals->link_up_delay_ms = delay_ms;
820 rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id)
822 struct bond_dev_private *internals;
824 if (valid_bonded_port_id(bonded_port_id) != 0)
827 internals = rte_eth_devices[bonded_port_id].data->dev_private;
829 return internals->link_up_delay_ms;