4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <rte_alarm.h>
39 #include <rte_malloc.h>
40 #include <rte_errno.h>
41 #include <rte_cycles.h>
43 #include "rte_eth_bond_private.h"
45 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
46 #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
47 bond_dbg_get_time_diff_ms(), slave_id, \
48 __func__, ##__VA_ARGS__)
50 static uint64_t start_time;
53 bond_dbg_get_time_diff_ms(void)
61 return ((now - start_time) * 1000) / rte_get_tsc_hz();
65 bond_print_lacp(struct lacpdu *l)
69 char a_state[256] = { 0 };
70 char p_state[256] = { 0 };
72 static const char * const state_labels[] = {
73 "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
81 addr = l->actor.port_params.system.addr_bytes;
82 snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
83 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
85 addr = l->partner.port_params.system.addr_bytes;
86 snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
87 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
89 for (i = 0; i < 8; i++) {
90 if ((l->actor.state >> i) & 1) {
91 a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
95 if ((l->partner.state >> i) & 1) {
96 p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
101 if (a_len && a_state[a_len-1] == ' ')
102 a_state[a_len-1] = '\0';
104 if (p_len && p_state[p_len-1] == ' ')
105 p_state[p_len-1] = '\0';
107 RTE_LOG(DEBUG, PMD, "LACP: {\n"\
110 " actor={ tlv=%02X, len=%02X\n"\
111 " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
114 " partner={ tlv=%02X, len=%02X\n"\
115 " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
118 " collector={info=%02X, length=%02X, max_delay=%04X\n, " \
119 "type_term=%02X, terminator_length = %02X}\n",\
122 l->actor.tlv_type_info,\
123 l->actor.info_length,\
124 l->actor.port_params.system_priority,\
126 l->actor.port_params.key,\
127 l->actor.port_params.port_priority,\
128 l->actor.port_params.port_number,\
130 l->partner.tlv_type_info,\
131 l->partner.info_length,\
132 l->partner.port_params.system_priority,\
134 l->partner.port_params.key,\
135 l->partner.port_params.port_priority,\
136 l->partner.port_params.port_number,\
138 l->tlv_type_collector_info,\
139 l->collector_info_length,\
140 l->collector_max_delay,\
141 l->tlv_type_terminator,\
142 l->terminator_length);
145 #define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
147 #define BOND_PRINT_LACP(lacpdu) do { } while (0)
148 #define MODE4_DEBUG(fmt, ...) do { } while (0)
151 static const struct ether_addr lacp_mac_addr = {
152 .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
155 struct port mode_8023ad_ports[RTE_MAX_ETHPORTS];
158 timer_cancel(uint64_t *timer)
164 timer_set(uint64_t *timer, uint64_t timeout)
166 *timer = rte_rdtsc() + timeout;
169 /* Forces given timer to be in expired state. */
171 timer_force_expired(uint64_t *timer)
173 *timer = rte_rdtsc();
177 timer_is_stopped(uint64_t *timer)
183 timer_is_expired(uint64_t *timer)
185 return *timer < rte_rdtsc();
188 /* Timer is in running state if it is not stopped nor expired */
190 timer_is_running(uint64_t *timer)
192 return !timer_is_stopped(timer) && !timer_is_expired(timer);
196 set_warning_flags(struct port *port, uint16_t flags)
200 uint16_t new_flag = 0;
203 old = port->warnings_to_show;
204 new_flag = old | flags;
205 retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
206 } while (unlikely(retval == 0));
210 show_warnings(uint8_t slave_id)
212 struct port *port = &mode_8023ad_ports[slave_id];
216 warnings = port->warnings_to_show;
217 } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
222 if (!timer_is_expired(&port->warning_timer))
226 timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
227 rte_get_tsc_hz() / 1000);
229 if (warnings & WRN_RX_QUEUE_FULL) {
231 "Slave %u: failed to enqueue LACP packet into RX ring.\n"
232 "Receive and transmit functions must be invoked on bonded\n"
233 "interface at least 10 times per second or LACP will not\n"
234 "work correctly\n", slave_id);
237 if (warnings & WRN_TX_QUEUE_FULL) {
239 "Slave %u: failed to enqueue LACP packet into TX ring.\n"
240 "Receive and transmit functions must be invoked on bonded\n"
241 "interface at least 10 times per second or LACP will not\n"
242 "work correctly\n", slave_id);
245 if (warnings & WRN_RX_MARKER_TO_FAST)
246 RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id);
248 if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
250 "Slave %u: ignoring unknown slow protocol frame type", slave_id);
253 if (warnings & WRN_UNKNOWN_MARKER_TYPE)
254 RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id);
256 if (warnings & WRN_NOT_LACP_CAPABLE)
257 MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
261 record_default(struct port *port)
263 /* Record default parameters for partner. Partner admin parameters
264 * are not implemented so set them to arbitrary default (last known) and
265 * mark actor that parner is in defaulted state. */
266 port->partner_state = STATE_LACP_ACTIVE;
267 ACTOR_STATE_SET(port, DEFAULTED);
270 /** Function handles rx state machine.
272 * This function implements Receive State Machine from point 5.4.12 in
273 * 802.1AX documentation. It should be called periodically.
275 * @param lacpdu LACPDU received.
276 * @param port Port on which LACPDU was received.
279 rx_machine(struct bond_dev_private *internals, uint8_t slave_id,
282 struct port *agg, *port = &mode_8023ad_ports[slave_id];
285 if (SM_FLAG(port, BEGIN)) {
286 /* Initialize stuff */
287 MODE4_DEBUG("-> INITIALIZE\n");
288 SM_FLAG_CLR(port, MOVED);
289 port->selected = UNSELECTED;
291 record_default(port);
293 ACTOR_STATE_CLR(port, EXPIRED);
294 timer_cancel(&port->current_while_timer);
296 /* DISABLED: On initialization partner is out of sync */
297 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
299 /* LACP DISABLED stuff if LACP not enabled on this port */
300 if (!SM_FLAG(port, LACP_ENABLED))
301 PARTNER_STATE_CLR(port, AGGREGATION);
303 PARTNER_STATE_SET(port, AGGREGATION);
306 if (!SM_FLAG(port, LACP_ENABLED)) {
307 /* Update parameters only if state changed */
308 if (!timer_is_stopped(&port->current_while_timer)) {
309 port->selected = UNSELECTED;
310 record_default(port);
311 PARTNER_STATE_CLR(port, AGGREGATION);
312 ACTOR_STATE_CLR(port, EXPIRED);
313 timer_cancel(&port->current_while_timer);
319 MODE4_DEBUG("LACP -> CURRENT\n");
320 BOND_PRINT_LACP(lacp);
321 /* Update selected flag. If partner parameters are defaulted assume they
322 * are match. If not defaulted compare LACP actor with ports parner
324 if (!ACTOR_STATE(port, DEFAULTED) &&
325 (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
326 || memcmp(&port->partner, &lacp->actor.port_params,
327 sizeof(port->partner)) != 0)) {
328 MODE4_DEBUG("selected <- UNSELECTED\n");
329 port->selected = UNSELECTED;
332 /* Record this PDU actor params as partner params */
333 memcpy(&port->partner, &lacp->actor.port_params,
334 sizeof(struct port_params));
335 port->partner_state = lacp->actor.state;
337 /* Partner parameters are not defaulted any more */
338 ACTOR_STATE_CLR(port, DEFAULTED);
340 /* If LACP partner params match this port actor params */
341 agg = &mode_8023ad_ports[port->aggregator_port_id];
342 bool match = port->actor.system_priority ==
343 lacp->partner.port_params.system_priority &&
344 is_same_ether_addr(&agg->actor.system,
345 &lacp->partner.port_params.system) &&
346 port->actor.port_priority ==
347 lacp->partner.port_params.port_priority &&
348 port->actor.port_number ==
349 lacp->partner.port_params.port_number;
351 /* Update NTT if partners information are outdated (xored and masked
353 uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
354 STATE_SYNCHRONIZATION | STATE_AGGREGATION;
356 if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
358 SM_FLAG_SET(port, NTT);
361 /* If LACP partner params match this port actor params */
362 if (match == true && ACTOR_STATE(port, AGGREGATION) ==
363 PARTNER_STATE(port, AGGREGATION))
364 PARTNER_STATE_SET(port, SYNCHRONIZATION);
365 else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
367 PARTNER_STATE_SET(port, SYNCHRONIZATION);
369 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
371 if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
372 timeout = internals->mode4.short_timeout;
374 timeout = internals->mode4.long_timeout;
376 timer_set(&port->current_while_timer, timeout);
377 ACTOR_STATE_CLR(port, EXPIRED);
378 return; /* No state change */
381 /* If CURRENT state timer is not running (stopped or expired)
382 * transit to EXPIRED state from DISABLED or CURRENT */
383 if (!timer_is_running(&port->current_while_timer)) {
384 ACTOR_STATE_SET(port, EXPIRED);
385 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
386 PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
387 timer_set(&port->current_while_timer, internals->mode4.short_timeout);
392 * Function handles periodic tx state machine.
394 * Function implements Periodic Transmission state machine from point 5.4.13
395 * in 802.1AX documentation. It should be called periodically.
397 * @param port Port to handle state machine.
400 periodic_machine(struct bond_dev_private *internals, uint8_t slave_id)
402 struct port *port = &mode_8023ad_ports[slave_id];
403 /* Calculate if either site is LACP enabled */
405 uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
406 PARTNER_STATE(port, LACP_ACTIVE);
408 uint8_t is_partner_fast, was_partner_fast;
409 /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
410 if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
411 timer_cancel(&port->periodic_timer);
412 timer_force_expired(&port->tx_machine_timer);
413 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
415 MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
416 SM_FLAG(port, BEGIN) ? "begind " : "",
417 SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
418 active ? "LACP active " : "LACP pasive ");
422 is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
423 was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
425 /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
426 * Other case: check if timer expire or partners settings changed. */
427 if (!timer_is_stopped(&port->periodic_timer)) {
428 if (timer_is_expired(&port->periodic_timer)) {
429 SM_FLAG_SET(port, NTT);
430 } else if (is_partner_fast != was_partner_fast) {
431 /* Partners timeout was slow and now it is fast -> send LACP.
432 * In other case (was fast and now it is slow) just switch
433 * timeout to slow without forcing send of LACP (because standard
435 if (!is_partner_fast)
436 SM_FLAG_SET(port, NTT);
438 return; /* Nothing changed */
441 /* Handle state transition to FAST/SLOW LACP timeout */
442 if (is_partner_fast) {
443 timeout = internals->mode4.fast_periodic_timeout;
444 SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
446 timeout = internals->mode4.slow_periodic_timeout;
447 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
450 timer_set(&port->periodic_timer, timeout);
454 * Function handles mux state machine.
456 * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
457 * It should be called periodically.
459 * @param port Port to handle state machine.
462 mux_machine(struct bond_dev_private *internals, uint8_t slave_id)
464 struct port *port = &mode_8023ad_ports[slave_id];
466 /* Save current state for later use */
467 const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
470 /* Enter DETACHED state on BEGIN condition or from any other state if
471 * port was unselected */
472 if (SM_FLAG(port, BEGIN) ||
473 port->selected == UNSELECTED || (port->selected == STANDBY &&
474 (port->actor_state & state_mask) != 0)) {
475 /* detach mux from aggregator */
476 port->actor_state &= ~state_mask;
477 /* Set ntt to true if BEGIN condition or transition from any other state
478 * which is indicated that wait_while_timer was started */
479 if (SM_FLAG(port, BEGIN) ||
480 !timer_is_stopped(&port->wait_while_timer)) {
481 SM_FLAG_SET(port, NTT);
482 MODE4_DEBUG("-> DETACHED\n");
484 timer_cancel(&port->wait_while_timer);
487 if (timer_is_stopped(&port->wait_while_timer)) {
488 if (port->selected == SELECTED || port->selected == STANDBY) {
489 timer_set(&port->wait_while_timer,
490 internals->mode4.aggregate_wait_timeout);
492 MODE4_DEBUG("DETACHED -> WAITING\n");
494 /* Waiting state entered */
498 /* Transit next state if port is ready */
499 if (!timer_is_expired(&port->wait_while_timer))
502 if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
503 !PARTNER_STATE(port, SYNCHRONIZATION)) {
504 /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
505 * sync transit to ATACHED state. */
506 ACTOR_STATE_CLR(port, DISTRIBUTING);
507 ACTOR_STATE_CLR(port, COLLECTING);
508 /* Clear actor sync to activate transit ATACHED in condition bellow */
509 ACTOR_STATE_CLR(port, SYNCHRONIZATION);
510 MODE4_DEBUG("Out of sync -> ATTACHED\n");
513 if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
514 /* attach mux to aggregator */
515 RTE_ASSERT((port->actor_state & (STATE_COLLECTING |
516 STATE_DISTRIBUTING)) == 0);
518 ACTOR_STATE_SET(port, SYNCHRONIZATION);
519 SM_FLAG_SET(port, NTT);
520 MODE4_DEBUG("ATTACHED Entered\n");
521 } else if (!ACTOR_STATE(port, COLLECTING)) {
522 /* Start collecting if in sync */
523 if (PARTNER_STATE(port, SYNCHRONIZATION)) {
524 MODE4_DEBUG("ATTACHED -> COLLECTING\n");
525 ACTOR_STATE_SET(port, COLLECTING);
526 SM_FLAG_SET(port, NTT);
528 } else if (ACTOR_STATE(port, COLLECTING)) {
529 /* Check if partner is in COLLECTING state. If so this port can
530 * distribute frames to it */
531 if (!ACTOR_STATE(port, DISTRIBUTING)) {
532 if (PARTNER_STATE(port, COLLECTING)) {
533 /* Enable DISTRIBUTING if partner is collecting */
534 ACTOR_STATE_SET(port, DISTRIBUTING);
535 SM_FLAG_SET(port, NTT);
536 MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
538 "Bond %u: slave id %u distributing started.\n",
539 internals->port_id, slave_id);
542 if (!PARTNER_STATE(port, COLLECTING)) {
543 /* Disable DISTRIBUTING (enter COLLECTING state) if partner
544 * is not collecting */
545 ACTOR_STATE_CLR(port, DISTRIBUTING);
546 SM_FLAG_SET(port, NTT);
547 MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
549 "Bond %u: slave id %u distributing stopped.\n",
550 internals->port_id, slave_id);
557 * Function handles transmit state machine.
559 * Function implements Transmit Machine from point 5.4.16 in 802.1AX
565 tx_machine(struct bond_dev_private *internals, uint8_t slave_id)
567 struct port *agg, *port = &mode_8023ad_ports[slave_id];
569 struct rte_mbuf *lacp_pkt = NULL;
570 struct lacpdu_header *hdr;
571 struct lacpdu *lacpdu;
573 /* If periodic timer is not running periodic machine is in NO PERIODIC and
574 * according to 802.3ax standard tx machine should not transmit any frames
575 * and set ntt to false. */
576 if (timer_is_stopped(&port->periodic_timer))
577 SM_FLAG_CLR(port, NTT);
579 if (!SM_FLAG(port, NTT))
582 if (!timer_is_expired(&port->tx_machine_timer))
585 lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
586 if (lacp_pkt == NULL) {
587 RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
591 lacp_pkt->data_len = sizeof(*hdr);
592 lacp_pkt->pkt_len = sizeof(*hdr);
594 hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
596 /* Source and destination MAC */
597 ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
598 rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
599 hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
601 lacpdu = &hdr->lacpdu;
602 memset(lacpdu, 0, sizeof(*lacpdu));
604 /* Initialize LACP part */
605 lacpdu->subtype = SLOW_SUBTYPE_LACP;
606 lacpdu->version_number = 1;
609 lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
610 lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
611 memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
612 sizeof(port->actor));
613 agg = &mode_8023ad_ports[port->aggregator_port_id];
614 ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system);
615 lacpdu->actor.state = port->actor_state;
618 lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
619 lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
620 memcpy(&lacpdu->partner.port_params, &port->partner,
621 sizeof(struct port_params));
622 lacpdu->partner.state = port->partner_state;
625 lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
626 lacpdu->collector_info_length = 0x10;
627 lacpdu->collector_max_delay = 0;
629 lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
630 lacpdu->terminator_length = 0;
632 if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
633 /* If TX ring full, drop packet and free message. Retransmission
634 * will happen in next function call. */
635 rte_pktmbuf_free(lacp_pkt);
636 set_warning_flags(port, WRN_TX_QUEUE_FULL);
640 MODE4_DEBUG("sending LACP frame\n");
641 BOND_PRINT_LACP(lacpdu);
643 timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
644 SM_FLAG_CLR(port, NTT);
648 * Function assigns port to aggregator.
650 * @param bond_dev_private Pointer to bond_dev_private structure.
651 * @param port_pos Port to assign.
654 selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
656 struct port *agg, *port;
657 uint8_t slaves_count, new_agg_id, i;
660 slaves = internals->active_slaves;
661 slaves_count = internals->active_slave_count;
662 port = &mode_8023ad_ports[slave_id];
664 /* Search for aggregator suitable for this port */
665 for (i = 0; i < slaves_count; ++i) {
666 agg = &mode_8023ad_ports[slaves[i]];
667 /* Skip ports that are not aggreagators */
668 if (agg->aggregator_port_id != slaves[i])
671 /* Actors system ID is not checked since all slave device have the same
672 * ID (MAC address). */
673 if ((agg->actor.key == port->actor.key &&
674 agg->partner.system_priority == port->partner.system_priority &&
675 is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
676 && (agg->partner.key == port->partner.key)) &&
677 is_zero_ether_addr(&port->partner.system) != 1 &&
679 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
685 /* By default, port uses it self as agregator */
686 if (i == slaves_count)
687 new_agg_id = slave_id;
689 new_agg_id = slaves[i];
691 if (new_agg_id != port->aggregator_port_id) {
692 port->aggregator_port_id = new_agg_id;
694 MODE4_DEBUG("-> SELECTED: ID=%3u\n"
695 "\t%s aggregator ID=%3u\n",
696 port->aggregator_port_id,
697 port->aggregator_port_id == slave_id ?
698 "aggregator not found, using default" : "aggregator found",
699 port->aggregator_port_id);
702 port->selected = SELECTED;
705 /* Function maps DPDK speed to bonding speed stored in key field */
707 link_speed_key(uint16_t speed) {
711 case ETH_SPEED_NUM_NONE:
714 case ETH_SPEED_NUM_10M:
715 key_speed = BOND_LINK_SPEED_KEY_10M;
717 case ETH_SPEED_NUM_100M:
718 key_speed = BOND_LINK_SPEED_KEY_100M;
720 case ETH_SPEED_NUM_1G:
721 key_speed = BOND_LINK_SPEED_KEY_1000M;
723 case ETH_SPEED_NUM_10G:
724 key_speed = BOND_LINK_SPEED_KEY_10G;
726 case ETH_SPEED_NUM_20G:
727 key_speed = BOND_LINK_SPEED_KEY_20G;
729 case ETH_SPEED_NUM_40G:
730 key_speed = BOND_LINK_SPEED_KEY_40G;
741 bond_mode_8023ad_periodic_cb(void *arg)
743 struct rte_eth_dev *bond_dev = arg;
744 struct bond_dev_private *internals = bond_dev->data->dev_private;
746 struct rte_eth_link link_info;
747 struct ether_addr slave_addr;
753 /* Update link status on each port */
754 for (i = 0; i < internals->active_slave_count; i++) {
757 slave_id = internals->active_slaves[i];
758 rte_eth_link_get(slave_id, &link_info);
759 rte_eth_macaddr_get(slave_id, &slave_addr);
761 if (link_info.link_status != 0) {
762 key = link_speed_key(link_info.link_speed) << 1;
763 if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
764 key |= BOND_LINK_FULL_DUPLEX_KEY;
768 port = &mode_8023ad_ports[slave_id];
770 key = rte_cpu_to_be_16(key);
771 if (key != port->actor.key) {
772 if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
773 set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
775 port->actor.key = key;
776 SM_FLAG_SET(port, NTT);
779 if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
780 ether_addr_copy(&slave_addr, &port->actor.system);
781 if (port->aggregator_port_id == slave_id)
782 SM_FLAG_SET(port, NTT);
786 for (i = 0; i < internals->active_slave_count; i++) {
787 slave_id = internals->active_slaves[i];
788 port = &mode_8023ad_ports[slave_id];
790 if ((port->actor.key &
791 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
793 SM_FLAG_SET(port, BEGIN);
795 /* LACP is disabled on half duples or link is down */
796 if (SM_FLAG(port, LACP_ENABLED)) {
797 /* If port was enabled set it to BEGIN state */
798 SM_FLAG_CLR(port, LACP_ENABLED);
799 ACTOR_STATE_CLR(port, DISTRIBUTING);
800 ACTOR_STATE_CLR(port, COLLECTING);
803 /* Skip this port processing */
807 SM_FLAG_SET(port, LACP_ENABLED);
809 /* Find LACP packet to this port. Do not check subtype, it is done in
810 * function that queued packet */
811 if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
812 struct rte_mbuf *lacp_pkt = pkt;
813 struct lacpdu_header *lacp;
815 lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
816 RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
818 /* This is LACP frame so pass it to rx_machine */
819 rx_machine(internals, slave_id, &lacp->lacpdu);
820 rte_pktmbuf_free(lacp_pkt);
822 rx_machine(internals, slave_id, NULL);
824 periodic_machine(internals, slave_id);
825 mux_machine(internals, slave_id);
826 tx_machine(internals, slave_id);
827 selection_logic(internals, slave_id);
829 SM_FLAG_CLR(port, BEGIN);
830 show_warnings(slave_id);
833 rte_eal_alarm_set(internals->mode4.update_timeout_us,
834 bond_mode_8023ad_periodic_cb, arg);
838 bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id)
840 struct bond_dev_private *internals = bond_dev->data->dev_private;
842 struct port *port = &mode_8023ad_ports[slave_id];
843 struct port_params initial = {
845 .system_priority = rte_cpu_to_be_16(0xFFFF),
846 .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
847 .port_priority = rte_cpu_to_be_16(0x00FF),
851 char mem_name[RTE_ETH_NAME_MAX_LEN];
853 unsigned element_size;
854 uint32_t total_tx_desc;
855 struct bond_tx_queue *bd_tx_q;
858 /* Given slave mus not be in active list */
859 RTE_ASSERT(find_slave_by_id(internals->active_slaves,
860 internals->active_slave_count, slave_id) == internals->active_slave_count);
861 RTE_SET_USED(internals); /* used only for assert when enabled */
863 memcpy(&port->actor, &initial, sizeof(struct port_params));
864 /* Standard requires that port ID must be grater than 0.
865 * Add 1 do get corresponding port_number */
866 port->actor.port_number = rte_cpu_to_be_16((uint16_t)slave_id + 1);
868 memcpy(&port->partner, &initial, sizeof(struct port_params));
871 port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
872 port->partner_state = STATE_LACP_ACTIVE;
873 port->sm_flags = SM_FLAGS_BEGIN;
875 /* use this port as agregator */
876 port->aggregator_port_id = slave_id;
877 rte_eth_promiscuous_enable(slave_id);
879 timer_cancel(&port->warning_timer);
881 if (port->mbuf_pool != NULL)
884 RTE_ASSERT(port->rx_ring == NULL);
885 RTE_ASSERT(port->tx_ring == NULL);
886 socket_id = rte_eth_devices[slave_id].data->numa_node;
888 element_size = sizeof(struct slow_protocol_frame) + sizeof(struct rte_mbuf)
889 + RTE_PKTMBUF_HEADROOM;
891 /* The size of the mempool should be at least:
892 * the sum of the TX descriptors + BOND_MODE_8023AX_SLAVE_TX_PKTS */
893 total_tx_desc = BOND_MODE_8023AX_SLAVE_TX_PKTS;
894 for (q_id = 0; q_id < bond_dev->data->nb_tx_queues; q_id++) {
895 bd_tx_q = (struct bond_tx_queue*)bond_dev->data->tx_queues[q_id];
896 total_tx_desc += bd_tx_q->nb_tx_desc;
899 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
900 port->mbuf_pool = rte_mempool_create(mem_name,
901 total_tx_desc, element_size,
902 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
903 sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
904 NULL, rte_pktmbuf_init, NULL, socket_id, MEMPOOL_F_NO_SPREAD);
906 /* Any memory allocation failure in initalization is critical because
907 * resources can't be free, so reinitialization is impossible. */
908 if (port->mbuf_pool == NULL) {
909 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
910 slave_id, mem_name, rte_strerror(rte_errno));
913 snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
914 port->rx_ring = rte_ring_create(mem_name,
915 rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
917 if (port->rx_ring == NULL) {
918 rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
919 mem_name, rte_strerror(rte_errno));
922 /* TX ring is at least one pkt longer to make room for marker packet. */
923 snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
924 port->tx_ring = rte_ring_create(mem_name,
925 rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
927 if (port->tx_ring == NULL) {
928 rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
929 mem_name, rte_strerror(rte_errno));
934 bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
937 struct bond_dev_private *internals = bond_dev->data->dev_private;
942 /* Given slave must be in active list */
943 RTE_ASSERT(find_slave_by_id(internals->active_slaves,
944 internals->active_slave_count, slave_id) < internals->active_slave_count);
946 /* Exclude slave from transmit policy. If this slave is an aggregator
947 * make all aggregated slaves unselected to force selection logic
948 * to select suitable aggregator for this port. */
949 for (i = 0; i < internals->active_slave_count; i++) {
950 port = &mode_8023ad_ports[internals->active_slaves[i]];
951 if (port->aggregator_port_id != slave_id)
954 port->selected = UNSELECTED;
956 /* Use default aggregator */
957 port->aggregator_port_id = internals->active_slaves[i];
960 port = &mode_8023ad_ports[slave_id];
961 port->selected = UNSELECTED;
962 port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
965 while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
966 rte_pktmbuf_free((struct rte_mbuf *)pkt);
968 while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
969 rte_pktmbuf_free((struct rte_mbuf *)pkt);
974 bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
976 struct bond_dev_private *internals = bond_dev->data->dev_private;
977 struct ether_addr slave_addr;
978 struct port *slave, *agg_slave;
979 uint8_t slave_id, i, j;
981 bond_mode_8023ad_stop(bond_dev);
983 for (i = 0; i < internals->active_slave_count; i++) {
984 slave_id = internals->active_slaves[i];
985 slave = &mode_8023ad_ports[slave_id];
986 rte_eth_macaddr_get(slave_id, &slave_addr);
988 if (is_same_ether_addr(&slave_addr, &slave->actor.system))
991 ether_addr_copy(&slave_addr, &slave->actor.system);
992 /* Do nothing if this port is not an aggregator. In other case
993 * Set NTT flag on every port that use this aggregator. */
994 if (slave->aggregator_port_id != slave_id)
997 for (j = 0; j < internals->active_slave_count; j++) {
998 agg_slave = &mode_8023ad_ports[internals->active_slaves[j]];
999 if (agg_slave->aggregator_port_id == slave_id)
1000 SM_FLAG_SET(agg_slave, NTT);
1004 if (bond_dev->data->dev_started)
1005 bond_mode_8023ad_start(bond_dev);
1009 bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
1010 struct rte_eth_bond_8023ad_conf *conf)
1012 struct bond_dev_private *internals = dev->data->dev_private;
1013 struct mode8023ad_private *mode4 = &internals->mode4;
1014 uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1016 conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
1017 conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
1018 conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
1019 conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
1020 conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
1021 conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
1022 conf->update_timeout_ms = mode4->update_timeout_us / 1000;
1023 conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
1027 bond_mode_8023ad_setup(struct rte_eth_dev *dev,
1028 struct rte_eth_bond_8023ad_conf *conf)
1030 struct rte_eth_bond_8023ad_conf def_conf;
1031 struct bond_dev_private *internals = dev->data->dev_private;
1032 struct mode8023ad_private *mode4 = &internals->mode4;
1033 uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1037 conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
1038 conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
1039 conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
1040 conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
1041 conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
1042 conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
1043 conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
1044 conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
1047 mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
1048 mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
1049 mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
1050 mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
1051 mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
1052 mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
1053 mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
1054 mode4->update_timeout_us = conf->update_timeout_ms * 1000;
1058 bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
1060 struct bond_dev_private *internals = bond_dev->data->dev_private;
1063 for (i = 0; i < internals->active_slave_count; i++)
1064 bond_mode_8023ad_activate_slave(bond_dev, i);
1070 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
1072 return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
1073 &bond_mode_8023ad_periodic_cb, bond_dev);
1077 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
1079 rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
1083 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
1084 uint8_t slave_id, struct rte_mbuf *pkt)
1086 struct mode8023ad_private *mode4 = &internals->mode4;
1087 struct port *port = &mode_8023ad_ports[slave_id];
1088 struct marker_header *m_hdr;
1089 uint64_t marker_timer, old_marker_timer;
1091 uint8_t wrn, subtype;
1092 /* If packet is a marker, we send response now by reusing given packet
1093 * and update only source MAC, destination MAC is multicast so don't
1094 * update it. Other frames will be handled later by state machines */
1095 subtype = rte_pktmbuf_mtod(pkt,
1096 struct slow_protocol_frame *)->slow_protocol.subtype;
1098 if (subtype == SLOW_SUBTYPE_MARKER) {
1099 m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
1101 if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
1102 wrn = WRN_UNKNOWN_MARKER_TYPE;
1106 /* Setup marker timer. Do it in loop in case concurrent access. */
1108 old_marker_timer = port->rx_marker_timer;
1109 if (!timer_is_expired(&old_marker_timer)) {
1110 wrn = WRN_RX_MARKER_TO_FAST;
1114 timer_set(&marker_timer, mode4->rx_marker_timeout);
1115 retval = rte_atomic64_cmpset(&port->rx_marker_timer,
1116 old_marker_timer, marker_timer);
1117 } while (unlikely(retval == 0));
1119 m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
1120 rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
1122 if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) {
1124 port->rx_marker_timer = 0;
1125 wrn = WRN_TX_QUEUE_FULL;
1128 } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
1129 if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
1130 /* If RX fing full free lacpdu message and drop packet */
1131 wrn = WRN_RX_QUEUE_FULL;
1135 wrn = WRN_UNKNOWN_SLOW_TYPE;
1142 set_warning_flags(port, wrn);
1143 rte_pktmbuf_free(pkt);
1147 rte_eth_bond_8023ad_conf_get(uint8_t port_id,
1148 struct rte_eth_bond_8023ad_conf *conf)
1150 struct rte_eth_dev *bond_dev;
1152 if (valid_bonded_port_id(port_id) != 0)
1158 bond_dev = &rte_eth_devices[port_id];
1159 bond_mode_8023ad_conf_get(bond_dev, conf);
1164 rte_eth_bond_8023ad_setup(uint8_t port_id,
1165 struct rte_eth_bond_8023ad_conf *conf)
1167 struct rte_eth_dev *bond_dev;
1169 if (valid_bonded_port_id(port_id) != 0)
1173 /* Basic sanity check */
1174 if (conf->slow_periodic_ms == 0 ||
1175 conf->fast_periodic_ms >= conf->slow_periodic_ms ||
1176 conf->long_timeout_ms == 0 ||
1177 conf->short_timeout_ms >= conf->long_timeout_ms ||
1178 conf->aggregate_wait_timeout_ms == 0 ||
1179 conf->tx_period_ms == 0 ||
1180 conf->rx_marker_period_ms == 0 ||
1181 conf->update_timeout_ms == 0) {
1182 RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n");
1187 bond_dev = &rte_eth_devices[port_id];
1188 bond_mode_8023ad_setup(bond_dev, conf);
1194 rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
1195 struct rte_eth_bond_8023ad_slave_info *info)
1197 struct rte_eth_dev *bond_dev;
1198 struct bond_dev_private *internals;
1201 if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
1202 rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1205 bond_dev = &rte_eth_devices[port_id];
1207 internals = bond_dev->data->dev_private;
1208 if (find_slave_by_id(internals->active_slaves,
1209 internals->active_slave_count, slave_id) ==
1210 internals->active_slave_count)
1213 port = &mode_8023ad_ports[slave_id];
1214 info->selected = port->selected;
1216 info->actor_state = port->actor_state;
1217 rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
1219 info->partner_state = port->partner_state;
1220 rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
1222 info->agg_port_id = port->aggregator_port_id;