4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <rte_alarm.h>
39 #include <rte_malloc.h>
40 #include <rte_errno.h>
41 #include <rte_cycles.h>
43 #include "rte_eth_bond_private.h"
45 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
46 #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
47 bond_dbg_get_time_diff_ms(), slave_id, \
48 __func__, ##__VA_ARGS__)
50 static uint64_t start_time;
53 bond_dbg_get_time_diff_ms(void)
61 return ((now - start_time) * 1000) / rte_get_tsc_hz();
65 bond_print_lacp(struct lacpdu *l)
69 char a_state[256] = { 0 };
70 char p_state[256] = { 0 };
72 static const char * const state_labels[] = {
73 "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
81 addr = l->actor.port_params.system.addr_bytes;
82 snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
83 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
85 addr = l->partner.port_params.system.addr_bytes;
86 snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
87 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
89 for (i = 0; i < 8; i++) {
90 if ((l->actor.state >> i) & 1) {
91 a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
95 if ((l->partner.state >> i) & 1) {
96 p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
101 if (a_len && a_state[a_len-1] == ' ')
102 a_state[a_len-1] = '\0';
104 if (p_len && p_state[p_len-1] == ' ')
105 p_state[p_len-1] = '\0';
107 RTE_LOG(DEBUG, PMD, "LACP: {\n"\
110 " actor={ tlv=%02X, len=%02X\n"\
111 " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
114 " partner={ tlv=%02X, len=%02X\n"\
115 " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
118 " collector={info=%02X, length=%02X, max_delay=%04X\n, " \
119 "type_term=%02X, terminator_length = %02X}\n",\
122 l->actor.tlv_type_info,\
123 l->actor.info_length,\
124 l->actor.port_params.system_priority,\
126 l->actor.port_params.key,\
127 l->actor.port_params.port_priority,\
128 l->actor.port_params.port_number,\
130 l->partner.tlv_type_info,\
131 l->partner.info_length,\
132 l->partner.port_params.system_priority,\
134 l->partner.port_params.key,\
135 l->partner.port_params.port_priority,\
136 l->partner.port_params.port_number,\
138 l->tlv_type_collector_info,\
139 l->collector_info_length,\
140 l->collector_max_delay,\
141 l->tlv_type_terminator,\
142 l->terminator_length);
145 #define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
147 #define BOND_PRINT_LACP(lacpdu) do { } while (0)
148 #define MODE4_DEBUG(fmt, ...) do { } while (0)
151 static const struct ether_addr lacp_mac_addr = {
152 .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
155 struct port mode_8023ad_ports[RTE_MAX_ETHPORTS];
158 timer_cancel(uint64_t *timer)
164 timer_set(uint64_t *timer, uint64_t timeout)
166 *timer = rte_rdtsc() + timeout;
169 /* Forces given timer to be in expired state. */
171 timer_force_expired(uint64_t *timer)
173 *timer = rte_rdtsc();
177 timer_is_stopped(uint64_t *timer)
183 timer_is_expired(uint64_t *timer)
185 return *timer < rte_rdtsc();
188 /* Timer is in running state if it is not stopped nor expired */
190 timer_is_running(uint64_t *timer)
192 return !timer_is_stopped(timer) && !timer_is_expired(timer);
196 set_warning_flags(struct port *port, uint16_t flags)
200 uint16_t new_flag = 0;
203 old = port->warnings_to_show;
204 new_flag = old | flags;
205 retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
206 } while (unlikely(retval == 0));
210 show_warnings(uint8_t slave_id)
212 struct port *port = &mode_8023ad_ports[slave_id];
216 warnings = port->warnings_to_show;
217 } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
222 if (!timer_is_expired(&port->warning_timer))
226 timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
227 rte_get_tsc_hz() / 1000);
229 if (warnings & WRN_RX_QUEUE_FULL) {
231 "Slave %u: failed to enqueue LACP packet into RX ring.\n"
232 "Receive and transmit functions must be invoked on bonded\n"
233 "interface at least 10 times per second or LACP will not\n"
234 "work correctly\n", slave_id);
237 if (warnings & WRN_TX_QUEUE_FULL) {
239 "Slave %u: failed to enqueue LACP packet into TX ring.\n"
240 "Receive and transmit functions must be invoked on bonded\n"
241 "interface at least 10 times per second or LACP will not\n"
242 "work correctly\n", slave_id);
245 if (warnings & WRN_RX_MARKER_TO_FAST)
246 RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id);
248 if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
250 "Slave %u: ignoring unknown slow protocol frame type", slave_id);
253 if (warnings & WRN_UNKNOWN_MARKER_TYPE)
254 RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id);
256 if (warnings & WRN_NOT_LACP_CAPABLE)
257 MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
261 record_default(struct port *port)
263 /* Record default parameters for partner. Partner admin parameters
264 * are not implemented so set them to arbitrary default (last known) and
265 * mark actor that parner is in defaulted state. */
266 port->partner_state = STATE_LACP_ACTIVE;
267 ACTOR_STATE_SET(port, DEFAULTED);
270 /** Function handles rx state machine.
272 * This function implements Receive State Machine from point 5.4.12 in
273 * 802.1AX documentation. It should be called periodically.
275 * @param lacpdu LACPDU received.
276 * @param port Port on which LACPDU was received.
279 rx_machine(struct bond_dev_private *internals, uint8_t slave_id,
282 struct port *agg, *port = &mode_8023ad_ports[slave_id];
285 if (SM_FLAG(port, BEGIN)) {
286 /* Initialize stuff */
287 MODE4_DEBUG("-> INITIALIZE\n");
288 SM_FLAG_CLR(port, MOVED);
289 port->selected = UNSELECTED;
291 record_default(port);
293 ACTOR_STATE_CLR(port, EXPIRED);
294 timer_cancel(&port->current_while_timer);
296 /* DISABLED: On initialization partner is out of sync */
297 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
299 /* LACP DISABLED stuff if LACP not enabled on this port */
300 if (!SM_FLAG(port, LACP_ENABLED))
301 PARTNER_STATE_CLR(port, AGGREGATION);
303 PARTNER_STATE_SET(port, AGGREGATION);
306 if (!SM_FLAG(port, LACP_ENABLED)) {
307 /* Update parameters only if state changed */
308 if (!timer_is_stopped(&port->current_while_timer)) {
309 port->selected = UNSELECTED;
310 record_default(port);
311 PARTNER_STATE_CLR(port, AGGREGATION);
312 ACTOR_STATE_CLR(port, EXPIRED);
313 timer_cancel(&port->current_while_timer);
319 MODE4_DEBUG("LACP -> CURRENT\n");
320 BOND_PRINT_LACP(lacp);
321 /* Update selected flag. If partner parameters are defaulted assume they
322 * are match. If not defaulted compare LACP actor with ports parner
324 if (!ACTOR_STATE(port, DEFAULTED) &&
325 (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
326 || memcmp(&port->partner, &lacp->actor.port_params,
327 sizeof(port->partner)) != 0)) {
328 MODE4_DEBUG("selected <- UNSELECTED\n");
329 port->selected = UNSELECTED;
332 /* Record this PDU actor params as partner params */
333 memcpy(&port->partner, &lacp->actor.port_params,
334 sizeof(struct port_params));
335 port->partner_state = lacp->actor.state;
337 /* Partner parameters are not defaulted any more */
338 ACTOR_STATE_CLR(port, DEFAULTED);
340 /* If LACP partner params match this port actor params */
341 agg = &mode_8023ad_ports[port->aggregator_port_id];
342 bool match = port->actor.system_priority ==
343 lacp->partner.port_params.system_priority &&
344 is_same_ether_addr(&agg->actor.system,
345 &lacp->partner.port_params.system) &&
346 port->actor.port_priority ==
347 lacp->partner.port_params.port_priority &&
348 port->actor.port_number ==
349 lacp->partner.port_params.port_number;
351 /* Update NTT if partners information are outdated (xored and masked
353 uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
354 STATE_SYNCHRONIZATION | STATE_AGGREGATION;
356 if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
358 SM_FLAG_SET(port, NTT);
361 /* If LACP partner params match this port actor params */
362 if (match == true && ACTOR_STATE(port, AGGREGATION) ==
363 PARTNER_STATE(port, AGGREGATION))
364 PARTNER_STATE_SET(port, SYNCHRONIZATION);
365 else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
367 PARTNER_STATE_SET(port, SYNCHRONIZATION);
369 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
371 if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
372 timeout = internals->mode4.short_timeout;
374 timeout = internals->mode4.long_timeout;
376 timer_set(&port->current_while_timer, timeout);
377 ACTOR_STATE_CLR(port, EXPIRED);
378 return; /* No state change */
381 /* If CURRENT state timer is not running (stopped or expired)
382 * transit to EXPIRED state from DISABLED or CURRENT */
383 if (!timer_is_running(&port->current_while_timer)) {
384 ACTOR_STATE_SET(port, EXPIRED);
385 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
386 PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
387 timer_set(&port->current_while_timer, internals->mode4.short_timeout);
392 * Function handles periodic tx state machine.
394 * Function implements Periodic Transmission state machine from point 5.4.13
395 * in 802.1AX documentation. It should be called periodically.
397 * @param port Port to handle state machine.
400 periodic_machine(struct bond_dev_private *internals, uint8_t slave_id)
402 struct port *port = &mode_8023ad_ports[slave_id];
403 /* Calculate if either site is LACP enabled */
405 uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
406 PARTNER_STATE(port, LACP_ACTIVE);
408 uint8_t is_partner_fast, was_partner_fast;
409 /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
410 if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
411 timer_cancel(&port->periodic_timer);
412 timer_force_expired(&port->tx_machine_timer);
413 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
415 MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
416 SM_FLAG(port, BEGIN) ? "begind " : "",
417 SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
418 active ? "LACP active " : "LACP pasive ");
422 is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
423 was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
425 /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
426 * Other case: check if timer expire or partners settings changed. */
427 if (!timer_is_stopped(&port->periodic_timer)) {
428 if (timer_is_expired(&port->periodic_timer)) {
429 SM_FLAG_SET(port, NTT);
430 } else if (is_partner_fast != was_partner_fast) {
431 /* Partners timeout was slow and now it is fast -> send LACP.
432 * In other case (was fast and now it is slow) just switch
433 * timeout to slow without forcing send of LACP (because standard
435 if (!is_partner_fast)
436 SM_FLAG_SET(port, NTT);
438 return; /* Nothing changed */
441 /* Handle state transition to FAST/SLOW LACP timeout */
442 if (is_partner_fast) {
443 timeout = internals->mode4.fast_periodic_timeout;
444 SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
446 timeout = internals->mode4.slow_periodic_timeout;
447 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
450 timer_set(&port->periodic_timer, timeout);
454 * Function handles mux state machine.
456 * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
457 * It should be called periodically.
459 * @param port Port to handle state machine.
462 mux_machine(struct bond_dev_private *internals, uint8_t slave_id)
464 struct port *port = &mode_8023ad_ports[slave_id];
466 /* Save current state for later use */
467 const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
470 /* Enter DETACHED state on BEGIN condition or from any other state if
471 * port was unselected */
472 if (SM_FLAG(port, BEGIN) ||
473 port->selected == UNSELECTED || (port->selected == STANDBY &&
474 (port->actor_state & state_mask) != 0)) {
475 /* detach mux from aggregator */
476 port->actor_state &= ~state_mask;
477 /* Set ntt to true if BEGIN condition or transition from any other state
478 * which is indicated that wait_while_timer was started */
479 if (SM_FLAG(port, BEGIN) ||
480 !timer_is_stopped(&port->wait_while_timer)) {
481 SM_FLAG_SET(port, NTT);
482 MODE4_DEBUG("-> DETACHED\n");
484 timer_cancel(&port->wait_while_timer);
487 if (timer_is_stopped(&port->wait_while_timer)) {
488 if (port->selected == SELECTED || port->selected == STANDBY) {
489 timer_set(&port->wait_while_timer,
490 internals->mode4.aggregate_wait_timeout);
492 MODE4_DEBUG("DETACHED -> WAITING\n");
494 /* Waiting state entered */
498 /* Transit next state if port is ready */
499 if (!timer_is_expired(&port->wait_while_timer))
502 if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
503 !PARTNER_STATE(port, SYNCHRONIZATION)) {
504 /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
505 * sync transit to ATACHED state. */
506 ACTOR_STATE_CLR(port, DISTRIBUTING);
507 ACTOR_STATE_CLR(port, COLLECTING);
508 /* Clear actor sync to activate transit ATACHED in condition bellow */
509 ACTOR_STATE_CLR(port, SYNCHRONIZATION);
510 MODE4_DEBUG("Out of sync -> ATTACHED\n");
513 if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
514 /* attach mux to aggregator */
515 RTE_VERIFY((port->actor_state & (STATE_COLLECTING |
516 STATE_DISTRIBUTING)) == 0);
518 ACTOR_STATE_SET(port, SYNCHRONIZATION);
519 SM_FLAG_SET(port, NTT);
520 MODE4_DEBUG("ATTACHED Entered\n");
521 } else if (!ACTOR_STATE(port, COLLECTING)) {
522 /* Start collecting if in sync */
523 if (PARTNER_STATE(port, SYNCHRONIZATION)) {
524 MODE4_DEBUG("ATTACHED -> COLLECTING\n");
525 ACTOR_STATE_SET(port, COLLECTING);
526 SM_FLAG_SET(port, NTT);
528 } else if (ACTOR_STATE(port, COLLECTING)) {
529 /* Check if partner is in COLLECTING state. If so this port can
530 * distribute frames to it */
531 if (!ACTOR_STATE(port, DISTRIBUTING)) {
532 if (PARTNER_STATE(port, COLLECTING)) {
533 /* Enable DISTRIBUTING if partner is collecting */
534 ACTOR_STATE_SET(port, DISTRIBUTING);
535 SM_FLAG_SET(port, NTT);
536 MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
538 "Bond %u: slave id %u distributing started.\n",
539 internals->port_id, slave_id);
542 if (!PARTNER_STATE(port, COLLECTING)) {
543 /* Disable DISTRIBUTING (enter COLLECTING state) if partner
544 * is not collecting */
545 ACTOR_STATE_CLR(port, DISTRIBUTING);
546 SM_FLAG_SET(port, NTT);
547 MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
549 "Bond %u: slave id %u distributing stopped.\n",
550 internals->port_id, slave_id);
557 * Function handles transmit state machine.
559 * Function implements Transmit Machine from point 5.4.16 in 802.1AX
565 tx_machine(struct bond_dev_private *internals, uint8_t slave_id)
567 struct port *agg, *port = &mode_8023ad_ports[slave_id];
569 struct rte_mbuf *lacp_pkt = NULL;
570 struct lacpdu_header *hdr;
571 struct lacpdu *lacpdu;
573 /* If periodic timer is not running periodic machine is in NO PERIODIC and
574 * according to 802.3ax standard tx machine should not transmit any frames
575 * and set ntt to false. */
576 if (timer_is_stopped(&port->periodic_timer))
577 SM_FLAG_CLR(port, NTT);
579 if (!SM_FLAG(port, NTT))
582 if (!timer_is_expired(&port->tx_machine_timer))
585 lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
586 if (lacp_pkt == NULL) {
587 RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
591 lacp_pkt->data_len = sizeof(*hdr);
592 lacp_pkt->pkt_len = sizeof(*hdr);
594 hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
596 /* Source and destination MAC */
597 ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
598 rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
599 hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
601 lacpdu = &hdr->lacpdu;
602 memset(lacpdu, 0, sizeof(*lacpdu));
604 /* Initialize LACP part */
605 lacpdu->subtype = SLOW_SUBTYPE_LACP;
606 lacpdu->version_number = 1;
609 lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
610 lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
611 memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
612 sizeof(port->actor));
613 agg = &mode_8023ad_ports[port->aggregator_port_id];
614 ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system);
615 lacpdu->actor.state = port->actor_state;
618 lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
619 lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
620 memcpy(&lacpdu->partner.port_params, &port->partner,
621 sizeof(struct port_params));
622 lacpdu->partner.state = port->partner_state;
625 lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
626 lacpdu->collector_info_length = 0x10;
627 lacpdu->collector_max_delay = 0;
629 lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
630 lacpdu->terminator_length = 0;
632 if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
633 /* If TX ring full, drop packet and free message. Retransmission
634 * will happen in next function call. */
635 rte_pktmbuf_free(lacp_pkt);
636 set_warning_flags(port, WRN_TX_QUEUE_FULL);
640 MODE4_DEBUG("sending LACP frame\n");
641 BOND_PRINT_LACP(lacpdu);
643 timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
644 SM_FLAG_CLR(port, NTT);
648 * Function assigns port to aggregator.
650 * @param bond_dev_private Pointer to bond_dev_private structure.
651 * @param port_pos Port to assign.
654 selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
656 struct port *agg, *port;
657 uint8_t slaves_count, new_agg_id, i;
660 slaves = internals->active_slaves;
661 slaves_count = internals->active_slave_count;
662 port = &mode_8023ad_ports[slave_id];
664 /* Search for aggregator suitable for this port */
665 for (i = 0; i < slaves_count; ++i) {
666 agg = &mode_8023ad_ports[slaves[i]];
667 /* Skip ports that are not aggreagators */
668 if (agg->aggregator_port_id != slaves[i])
671 /* Actors system ID is not checked since all slave device have the same
672 * ID (MAC address). */
673 if ((agg->actor.key == port->actor.key &&
674 agg->partner.system_priority == port->partner.system_priority &&
675 is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
676 && (agg->partner.key == port->partner.key)) &&
677 is_zero_ether_addr(&port->partner.system) != 1 &&
679 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
685 /* By default, port uses it self as agregator */
686 if (i == slaves_count)
687 new_agg_id = slave_id;
689 new_agg_id = slaves[i];
691 if (new_agg_id != port->aggregator_port_id) {
692 port->aggregator_port_id = new_agg_id;
694 MODE4_DEBUG("-> SELECTED: ID=%3u\n"
695 "\t%s aggregator ID=%3u\n",
696 port->aggregator_port_id,
697 port->aggregator_port_id == slave_id ?
698 "aggregator not found, using default" : "aggregator found",
699 port->aggregator_port_id);
702 port->selected = SELECTED;
705 /* Function maps DPDK speed to bonding speed stored in key field */
707 link_speed_key(uint16_t speed) {
711 case ETH_LINK_SPEED_AUTONEG:
714 case ETH_LINK_SPEED_10:
715 key_speed = BOND_LINK_SPEED_KEY_10M;
717 case ETH_LINK_SPEED_100:
718 key_speed = BOND_LINK_SPEED_KEY_100M;
720 case ETH_LINK_SPEED_1000:
721 key_speed = BOND_LINK_SPEED_KEY_1000M;
723 case ETH_LINK_SPEED_10G:
724 key_speed = BOND_LINK_SPEED_KEY_10G;
726 case ETH_LINK_SPEED_20G:
727 key_speed = BOND_LINK_SPEED_KEY_20G;
729 case ETH_LINK_SPEED_40G:
730 key_speed = BOND_LINK_SPEED_KEY_40G;
741 bond_mode_8023ad_periodic_cb(void *arg)
743 struct rte_eth_dev *bond_dev = arg;
744 struct bond_dev_private *internals = bond_dev->data->dev_private;
746 struct rte_eth_link link_info;
747 struct ether_addr slave_addr;
753 /* Update link status on each port */
754 for (i = 0; i < internals->active_slave_count; i++) {
757 slave_id = internals->active_slaves[i];
758 rte_eth_link_get(slave_id, &link_info);
759 rte_eth_macaddr_get(slave_id, &slave_addr);
761 if (link_info.link_status != 0) {
762 key = link_speed_key(link_info.link_speed) << 1;
763 if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
764 key |= BOND_LINK_FULL_DUPLEX_KEY;
768 port = &mode_8023ad_ports[slave_id];
770 key = rte_cpu_to_be_16(key);
771 if (key != port->actor.key) {
772 if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
773 set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
775 port->actor.key = key;
776 SM_FLAG_SET(port, NTT);
779 if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
780 ether_addr_copy(&slave_addr, &port->actor.system);
781 if (port->aggregator_port_id == slave_id)
782 SM_FLAG_SET(port, NTT);
786 for (i = 0; i < internals->active_slave_count; i++) {
787 slave_id = internals->active_slaves[i];
788 port = &mode_8023ad_ports[slave_id];
790 if ((port->actor.key &
791 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
793 SM_FLAG_SET(port, BEGIN);
795 /* LACP is disabled on half duples or link is down */
796 if (SM_FLAG(port, LACP_ENABLED)) {
797 /* If port was enabled set it to BEGIN state */
798 SM_FLAG_CLR(port, LACP_ENABLED);
799 ACTOR_STATE_CLR(port, DISTRIBUTING);
800 ACTOR_STATE_CLR(port, COLLECTING);
803 /* Skip this port processing */
807 SM_FLAG_SET(port, LACP_ENABLED);
809 /* Find LACP packet to this port. Do not check subtype, it is done in
810 * function that queued packet */
811 if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
812 struct rte_mbuf *lacp_pkt = pkt;
813 struct lacpdu_header *lacp;
815 lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
816 RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
818 /* This is LACP frame so pass it to rx_machine */
819 rx_machine(internals, slave_id, &lacp->lacpdu);
820 rte_pktmbuf_free(lacp_pkt);
822 rx_machine(internals, slave_id, NULL);
824 periodic_machine(internals, slave_id);
825 mux_machine(internals, slave_id);
826 tx_machine(internals, slave_id);
827 selection_logic(internals, slave_id);
829 SM_FLAG_CLR(port, BEGIN);
830 show_warnings(slave_id);
833 rte_eal_alarm_set(internals->mode4.update_timeout_us,
834 bond_mode_8023ad_periodic_cb, arg);
838 bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id)
840 struct bond_dev_private *internals = bond_dev->data->dev_private;
842 struct port *port = &mode_8023ad_ports[slave_id];
843 struct port_params initial = {
845 .system_priority = rte_cpu_to_be_16(0xFFFF),
846 .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
847 .port_priority = rte_cpu_to_be_16(0x00FF),
851 char mem_name[RTE_ETH_NAME_MAX_LEN];
853 unsigned element_size;
855 /* Given slave mus not be in active list */
856 RTE_VERIFY(find_slave_by_id(internals->active_slaves,
857 internals->active_slave_count, slave_id) == internals->active_slave_count);
859 memcpy(&port->actor, &initial, sizeof(struct port_params));
860 /* Standard requires that port ID must be grater than 0.
861 * Add 1 do get corresponding port_number */
862 port->actor.port_number = rte_cpu_to_be_16((uint16_t)slave_id + 1);
864 memcpy(&port->partner, &initial, sizeof(struct port_params));
867 port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
868 port->partner_state = STATE_LACP_ACTIVE;
869 port->sm_flags = SM_FLAGS_BEGIN;
871 /* use this port as agregator */
872 port->aggregator_port_id = slave_id;
873 rte_eth_promiscuous_enable(slave_id);
875 timer_cancel(&port->warning_timer);
877 if (port->mbuf_pool != NULL)
880 RTE_VERIFY(port->rx_ring == NULL);
881 RTE_VERIFY(port->tx_ring == NULL);
882 socket_id = rte_eth_devices[slave_id].pci_dev->numa_node;
884 element_size = sizeof(struct slow_protocol_frame) + sizeof(struct rte_mbuf)
885 + RTE_PKTMBUF_HEADROOM;
887 /* How big memory pool should be? If driver will not
888 * free packets quick enough there will be ENOMEM in tx_machine.
889 * For now give 511 pkts * max number of queued TX packets per slave.
890 * Hope it will be enough. */
891 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
892 port->mbuf_pool = rte_mempool_create(mem_name,
893 BOND_MODE_8023AX_SLAVE_TX_PKTS * 512 - 1,
895 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
896 sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
897 NULL, rte_pktmbuf_init, NULL, socket_id, MEMPOOL_F_NO_SPREAD);
899 /* Any memory allocation failure in initalization is critical because
900 * resources can't be free, so reinitialization is impossible. */
901 if (port->mbuf_pool == NULL) {
902 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
903 slave_id, mem_name, rte_strerror(rte_errno));
906 snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
907 port->rx_ring = rte_ring_create(mem_name,
908 rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
910 if (port->rx_ring == NULL) {
911 rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
912 mem_name, rte_strerror(rte_errno));
915 /* TX ring is at least one pkt longer to make room for marker packet. */
916 snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
917 port->tx_ring = rte_ring_create(mem_name,
918 rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
920 if (port->tx_ring == NULL) {
921 rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
922 mem_name, rte_strerror(rte_errno));
927 bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
930 struct bond_dev_private *internals = bond_dev->data->dev_private;
935 /* Given slave mus be in active list */
936 RTE_VERIFY(find_slave_by_id(internals->active_slaves,
937 internals->active_slave_count, slave_id) < internals->active_slave_count);
939 /* Exclude slave from transmit policy. If this slave is an aggregator
940 * make all aggregated slaves unselected to force sellection logic
941 * to select suitable aggregator for this port. */
942 for (i = 0; i < internals->active_slave_count; i++) {
943 port = &mode_8023ad_ports[internals->active_slaves[i]];
944 if (port->aggregator_port_id != slave_id)
947 port->selected = UNSELECTED;
949 /* Use default aggregator */
950 port->aggregator_port_id = internals->active_slaves[i];
953 port = &mode_8023ad_ports[slave_id];
954 port->selected = UNSELECTED;
955 port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
958 while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
959 rte_pktmbuf_free((struct rte_mbuf *)pkt);
961 while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
962 rte_pktmbuf_free((struct rte_mbuf *)pkt);
967 bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
969 struct bond_dev_private *internals = bond_dev->data->dev_private;
970 struct ether_addr slave_addr;
971 struct port *slave, *agg_slave;
972 uint8_t slave_id, i, j;
974 bond_mode_8023ad_stop(bond_dev);
976 for (i = 0; i < internals->active_slave_count; i++) {
977 slave_id = internals->active_slaves[i];
978 slave = &mode_8023ad_ports[slave_id];
979 rte_eth_macaddr_get(slave_id, &slave_addr);
981 if (is_same_ether_addr(&slave_addr, &slave->actor.system))
984 ether_addr_copy(&slave_addr, &slave->actor.system);
985 /* Do nothing if this port is not an aggregator. In other case
986 * Set NTT flag on every port that use this aggregator. */
987 if (slave->aggregator_port_id != slave_id)
990 for (j = 0; j < internals->active_slave_count; j++) {
991 agg_slave = &mode_8023ad_ports[internals->active_slaves[j]];
992 if (agg_slave->aggregator_port_id == slave_id)
993 SM_FLAG_SET(agg_slave, NTT);
997 if (bond_dev->data->dev_started)
998 bond_mode_8023ad_start(bond_dev);
1002 bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
1003 struct rte_eth_bond_8023ad_conf *conf)
1005 struct bond_dev_private *internals = dev->data->dev_private;
1006 struct mode8023ad_private *mode4 = &internals->mode4;
1007 uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1009 conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
1010 conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
1011 conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
1012 conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
1013 conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
1014 conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
1015 conf->update_timeout_ms = mode4->update_timeout_us / 1000;
1019 bond_mode_8023ad_setup(struct rte_eth_dev *dev,
1020 struct rte_eth_bond_8023ad_conf *conf)
1022 struct rte_eth_bond_8023ad_conf def_conf;
1023 struct bond_dev_private *internals = dev->data->dev_private;
1024 struct mode8023ad_private *mode4 = &internals->mode4;
1025 uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1029 conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
1030 conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
1031 conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
1032 conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
1033 conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
1034 conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
1035 conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
1036 conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
1039 mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
1040 mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
1041 mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
1042 mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
1043 mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
1044 mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
1045 mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
1046 mode4->update_timeout_us = conf->update_timeout_ms * 1000;
1050 bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
1052 struct bond_dev_private *internals = bond_dev->data->dev_private;
1055 for (i = 0; i < internals->active_slave_count; i++)
1056 bond_mode_8023ad_activate_slave(bond_dev, i);
1062 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
1064 return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
1065 &bond_mode_8023ad_periodic_cb, bond_dev);
1069 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
1071 rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
1075 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
1076 uint8_t slave_id, struct rte_mbuf *pkt)
1078 struct mode8023ad_private *mode4 = &internals->mode4;
1079 struct port *port = &mode_8023ad_ports[slave_id];
1080 struct marker_header *m_hdr;
1081 uint64_t marker_timer, old_marker_timer;
1083 uint8_t wrn, subtype;
1084 /* If packet is a marker, we send response now by reusing given packet
1085 * and update only source MAC, destination MAC is multicast so don't
1086 * update it. Other frames will be handled later by state machines */
1087 subtype = rte_pktmbuf_mtod(pkt,
1088 struct slow_protocol_frame *)->slow_protocol.subtype;
1090 if (subtype == SLOW_SUBTYPE_MARKER) {
1091 m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
1093 if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
1094 wrn = WRN_UNKNOWN_MARKER_TYPE;
1098 /* Setup marker timer. Do it in loop in case concurent access. */
1100 old_marker_timer = port->rx_marker_timer;
1101 if (!timer_is_expired(&old_marker_timer)) {
1102 wrn = WRN_RX_MARKER_TO_FAST;
1106 timer_set(&marker_timer, mode4->rx_marker_timeout);
1107 retval = rte_atomic64_cmpset(&port->rx_marker_timer,
1108 old_marker_timer, marker_timer);
1109 } while (unlikely(retval == 0));
1111 m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
1112 rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
1114 if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) {
1116 port->rx_marker_timer = 0;
1117 wrn = WRN_TX_QUEUE_FULL;
1120 } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
1121 if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
1122 /* If RX fing full free lacpdu message and drop packet */
1123 wrn = WRN_RX_QUEUE_FULL;
1127 wrn = WRN_UNKNOWN_SLOW_TYPE;
1134 set_warning_flags(port, wrn);
1135 rte_pktmbuf_free(pkt);
1139 rte_eth_bond_8023ad_conf_get(uint8_t port_id,
1140 struct rte_eth_bond_8023ad_conf *conf)
1142 struct rte_eth_dev *bond_dev;
1144 if (valid_bonded_port_id(port_id) != 0)
1150 bond_dev = &rte_eth_devices[port_id];
1151 bond_mode_8023ad_conf_get(bond_dev, conf);
1156 rte_eth_bond_8023ad_setup(uint8_t port_id,
1157 struct rte_eth_bond_8023ad_conf *conf)
1159 struct rte_eth_dev *bond_dev;
1161 if (valid_bonded_port_id(port_id) != 0)
1165 /* Basic sanity check */
1166 if (conf->slow_periodic_ms == 0 ||
1167 conf->fast_periodic_ms >= conf->slow_periodic_ms ||
1168 conf->long_timeout_ms == 0 ||
1169 conf->short_timeout_ms >= conf->long_timeout_ms ||
1170 conf->aggregate_wait_timeout_ms == 0 ||
1171 conf->tx_period_ms == 0 ||
1172 conf->rx_marker_period_ms == 0 ||
1173 conf->update_timeout_ms == 0) {
1174 RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n");
1179 bond_dev = &rte_eth_devices[port_id];
1180 bond_mode_8023ad_setup(bond_dev, conf);
1186 rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
1187 struct rte_eth_bond_8023ad_slave_info *info)
1189 struct rte_eth_dev *bond_dev;
1190 struct bond_dev_private *internals;
1193 if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
1194 rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1197 bond_dev = &rte_eth_devices[port_id];
1199 internals = bond_dev->data->dev_private;
1200 if (find_slave_by_id(internals->active_slaves,
1201 internals->active_slave_count, slave_id) ==
1202 internals->active_slave_count)
1205 port = &mode_8023ad_ports[slave_id];
1206 info->selected = port->selected;
1208 info->actor_state = port->actor_state;
1209 rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
1211 info->partner_state = port->partner_state;
1212 rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
1214 info->agg_port_id = port->aggregator_port_id;