net/bonding: fix selection logic
[dpdk.git] / drivers / net / bonding / rte_eth_bond_8023ad.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <stddef.h>
6 #include <string.h>
7 #include <stdbool.h>
8
9 #include <rte_alarm.h>
10 #include <rte_malloc.h>
11 #include <rte_errno.h>
12 #include <rte_cycles.h>
13 #include <rte_compat.h>
14
15 #include "eth_bond_private.h"
16
17 static void bond_mode_8023ad_ext_periodic_cb(void *arg);
18 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
19
20 #define MODE4_DEBUG(fmt, ...)                           \
21         rte_log(RTE_LOG_DEBUG, bond_logtype,            \
22                 "%6u [Port %u: %s] " fmt,               \
23                 bond_dbg_get_time_diff_ms(), slave_id,  \
24                 __func__, ##__VA_ARGS__)
25
26 static uint64_t start_time;
27
28 static unsigned
29 bond_dbg_get_time_diff_ms(void)
30 {
31         uint64_t now;
32
33         now = rte_rdtsc();
34         if (start_time == 0)
35                 start_time = now;
36
37         return ((now - start_time) * 1000) / rte_get_tsc_hz();
38 }
39
40 static void
41 bond_print_lacp(struct lacpdu *l)
42 {
43         char a_address[18];
44         char p_address[18];
45         char a_state[256] = { 0 };
46         char p_state[256] = { 0 };
47
48         static const char * const state_labels[] = {
49                 "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
50         };
51
52         int a_len = 0;
53         int p_len = 0;
54         uint8_t i;
55         uint8_t *addr;
56
57         addr = l->actor.port_params.system.addr_bytes;
58         snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
59                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
60
61         addr = l->partner.port_params.system.addr_bytes;
62         snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
63                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
64
65         for (i = 0; i < 8; i++) {
66                 if ((l->actor.state >> i) & 1) {
67                         a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
68                                 state_labels[i]);
69                 }
70
71                 if ((l->partner.state >> i) & 1) {
72                         p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
73                                 state_labels[i]);
74                 }
75         }
76
77         if (a_len && a_state[a_len-1] == ' ')
78                 a_state[a_len-1] = '\0';
79
80         if (p_len && p_state[p_len-1] == ' ')
81                 p_state[p_len-1] = '\0';
82
83         RTE_BOND_LOG(DEBUG,
84                      "LACP: {\n"
85                      "  subtype= %02X\n"
86                      "  ver_num=%02X\n"
87                      "  actor={ tlv=%02X, len=%02X\n"
88                      "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"
89                      "       state={ %s }\n"
90                      "  }\n"
91                      "  partner={ tlv=%02X, len=%02X\n"
92                      "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"
93                      "       state={ %s }\n"
94                      "  }\n"
95                      "  collector={info=%02X, length=%02X, max_delay=%04X\n, "
96                      "type_term=%02X, terminator_length = %02X }",
97                      l->subtype,
98                      l->version_number,
99                      l->actor.tlv_type_info,
100                      l->actor.info_length,
101                      l->actor.port_params.system_priority,
102                      a_address,
103                      l->actor.port_params.key,
104                      l->actor.port_params.port_priority,
105                      l->actor.port_params.port_number,
106                      a_state,
107                      l->partner.tlv_type_info,
108                      l->partner.info_length,
109                      l->partner.port_params.system_priority,
110                      p_address,
111                      l->partner.port_params.key,
112                      l->partner.port_params.port_priority,
113                      l->partner.port_params.port_number,
114                      p_state,
115                      l->tlv_type_collector_info,
116                      l->collector_info_length,
117                      l->collector_max_delay,
118                      l->tlv_type_terminator,
119                      l->terminator_length);
120
121 }
122
123 #define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
124 #else
125 #define BOND_PRINT_LACP(lacpdu) do { } while (0)
126 #define MODE4_DEBUG(fmt, ...) do { } while (0)
127 #endif
128
129 static const struct rte_ether_addr lacp_mac_addr = {
130         .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
131 };
132
133 struct port bond_mode_8023ad_ports[RTE_MAX_ETHPORTS];
134
135 static void
136 timer_cancel(uint64_t *timer)
137 {
138         *timer = 0;
139 }
140
141 static void
142 timer_set(uint64_t *timer, uint64_t timeout)
143 {
144         *timer = rte_rdtsc() + timeout;
145 }
146
147 /* Forces given timer to be in expired state. */
148 static void
149 timer_force_expired(uint64_t *timer)
150 {
151         *timer = rte_rdtsc();
152 }
153
154 static bool
155 timer_is_stopped(uint64_t *timer)
156 {
157         return *timer == 0;
158 }
159
160 static bool
161 timer_is_expired(uint64_t *timer)
162 {
163         return *timer < rte_rdtsc();
164 }
165
166 /* Timer is in running state if it is not stopped nor expired */
167 static bool
168 timer_is_running(uint64_t *timer)
169 {
170         return !timer_is_stopped(timer) && !timer_is_expired(timer);
171 }
172
173 static void
174 set_warning_flags(struct port *port, uint16_t flags)
175 {
176         int retval;
177         uint16_t old;
178         uint16_t new_flag = 0;
179
180         do {
181                 old = port->warnings_to_show;
182                 new_flag = old | flags;
183                 retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
184         } while (unlikely(retval == 0));
185 }
186
187 static void
188 show_warnings(uint16_t slave_id)
189 {
190         struct port *port = &bond_mode_8023ad_ports[slave_id];
191         uint8_t warnings;
192
193         do {
194                 warnings = port->warnings_to_show;
195         } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
196
197         if (!warnings)
198                 return;
199
200         if (!timer_is_expired(&port->warning_timer))
201                 return;
202
203
204         timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
205                         rte_get_tsc_hz() / 1000);
206
207         if (warnings & WRN_RX_QUEUE_FULL) {
208                 RTE_BOND_LOG(DEBUG,
209                              "Slave %u: failed to enqueue LACP packet into RX ring.\n"
210                              "Receive and transmit functions must be invoked on bonded"
211                              "interface at least 10 times per second or LACP will notwork correctly",
212                              slave_id);
213         }
214
215         if (warnings & WRN_TX_QUEUE_FULL) {
216                 RTE_BOND_LOG(DEBUG,
217                              "Slave %u: failed to enqueue LACP packet into TX ring.\n"
218                              "Receive and transmit functions must be invoked on bonded"
219                              "interface at least 10 times per second or LACP will not work correctly",
220                              slave_id);
221         }
222
223         if (warnings & WRN_RX_MARKER_TO_FAST)
224                 RTE_BOND_LOG(INFO, "Slave %u: marker to early - ignoring.",
225                              slave_id);
226
227         if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
228                 RTE_BOND_LOG(INFO,
229                         "Slave %u: ignoring unknown slow protocol frame type",
230                              slave_id);
231         }
232
233         if (warnings & WRN_UNKNOWN_MARKER_TYPE)
234                 RTE_BOND_LOG(INFO, "Slave %u: ignoring unknown marker type",
235                              slave_id);
236
237         if (warnings & WRN_NOT_LACP_CAPABLE)
238                 MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
239 }
240
241 static void
242 record_default(struct port *port)
243 {
244         /* Record default parameters for partner. Partner admin parameters
245          * are not implemented so set them to arbitrary default (last known) and
246          * mark actor that parner is in defaulted state. */
247         port->partner_state = STATE_LACP_ACTIVE;
248         ACTOR_STATE_SET(port, DEFAULTED);
249 }
250
251 /** Function handles rx state machine.
252  *
253  * This function implements Receive State Machine from point 5.4.12 in
254  * 802.1AX documentation. It should be called periodically.
255  *
256  * @param lacpdu                LACPDU received.
257  * @param port                  Port on which LACPDU was received.
258  */
259 static void
260 rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
261                 struct lacpdu *lacp)
262 {
263         struct port *agg, *port = &bond_mode_8023ad_ports[slave_id];
264         uint64_t timeout;
265
266         if (SM_FLAG(port, BEGIN)) {
267                 /* Initialize stuff */
268                 MODE4_DEBUG("-> INITIALIZE\n");
269                 SM_FLAG_CLR(port, MOVED);
270                 port->selected = UNSELECTED;
271
272                 record_default(port);
273
274                 ACTOR_STATE_CLR(port, EXPIRED);
275                 timer_cancel(&port->current_while_timer);
276
277                 /* DISABLED: On initialization partner is out of sync */
278                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
279
280                 /* LACP DISABLED stuff if LACP not enabled on this port */
281                 if (!SM_FLAG(port, LACP_ENABLED))
282                         PARTNER_STATE_CLR(port, AGGREGATION);
283                 else
284                         PARTNER_STATE_SET(port, AGGREGATION);
285         }
286
287         if (!SM_FLAG(port, LACP_ENABLED)) {
288                 /* Update parameters only if state changed */
289                 if (!timer_is_stopped(&port->current_while_timer)) {
290                         port->selected = UNSELECTED;
291                         record_default(port);
292                         PARTNER_STATE_CLR(port, AGGREGATION);
293                         ACTOR_STATE_CLR(port, EXPIRED);
294                         timer_cancel(&port->current_while_timer);
295                 }
296                 return;
297         }
298
299         if (lacp) {
300                 MODE4_DEBUG("LACP -> CURRENT\n");
301                 BOND_PRINT_LACP(lacp);
302                 /* Update selected flag. If partner parameters are defaulted assume they
303                  * are match. If not defaulted  compare LACP actor with ports parner
304                  * params. */
305                 if (!ACTOR_STATE(port, DEFAULTED) &&
306                         (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
307                         || memcmp(&port->partner, &lacp->actor.port_params,
308                                 sizeof(port->partner)) != 0)) {
309                         MODE4_DEBUG("selected <- UNSELECTED\n");
310                         port->selected = UNSELECTED;
311                 }
312
313                 /* Record this PDU actor params as partner params */
314                 memcpy(&port->partner, &lacp->actor.port_params,
315                         sizeof(struct port_params));
316                 port->partner_state = lacp->actor.state;
317
318                 /* Partner parameters are not defaulted any more */
319                 ACTOR_STATE_CLR(port, DEFAULTED);
320
321                 /* If LACP partner params match this port actor params */
322                 agg = &bond_mode_8023ad_ports[port->aggregator_port_id];
323                 bool match = port->actor.system_priority ==
324                         lacp->partner.port_params.system_priority &&
325                         rte_is_same_ether_addr(&agg->actor.system,
326                         &lacp->partner.port_params.system) &&
327                         port->actor.port_priority ==
328                         lacp->partner.port_params.port_priority &&
329                         port->actor.port_number ==
330                         lacp->partner.port_params.port_number;
331
332                 /* Update NTT if partners information are outdated (xored and masked
333                  * bits are set)*/
334                 uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
335                         STATE_SYNCHRONIZATION | STATE_AGGREGATION;
336
337                 if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
338                                 match == false) {
339                         SM_FLAG_SET(port, NTT);
340                 }
341
342                 /* If LACP partner params match this port actor params */
343                 if (match == true && ACTOR_STATE(port, AGGREGATION) ==
344                                 PARTNER_STATE(port,     AGGREGATION))
345                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
346                 else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
347                                 AGGREGATION))
348                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
349                 else
350                         PARTNER_STATE_CLR(port, SYNCHRONIZATION);
351
352                 if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
353                         timeout = internals->mode4.short_timeout;
354                 else
355                         timeout = internals->mode4.long_timeout;
356
357                 timer_set(&port->current_while_timer, timeout);
358                 ACTOR_STATE_CLR(port, EXPIRED);
359                 return; /* No state change */
360         }
361
362         /* If CURRENT state timer is not running (stopped or expired)
363          * transit to EXPIRED state from DISABLED or CURRENT */
364         if (!timer_is_running(&port->current_while_timer)) {
365                 ACTOR_STATE_SET(port, EXPIRED);
366                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
367                 PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
368                 timer_set(&port->current_while_timer, internals->mode4.short_timeout);
369         }
370 }
371
372 /**
373  * Function handles periodic tx state machine.
374  *
375  * Function implements Periodic Transmission state machine from point 5.4.13
376  * in 802.1AX documentation. It should be called periodically.
377  *
378  * @param port                  Port to handle state machine.
379  */
380 static void
381 periodic_machine(struct bond_dev_private *internals, uint16_t slave_id)
382 {
383         struct port *port = &bond_mode_8023ad_ports[slave_id];
384         /* Calculate if either site is LACP enabled */
385         uint64_t timeout;
386         uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
387                 PARTNER_STATE(port, LACP_ACTIVE);
388
389         uint8_t is_partner_fast, was_partner_fast;
390         /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
391         if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
392                 timer_cancel(&port->periodic_timer);
393                 timer_force_expired(&port->tx_machine_timer);
394                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
395
396                 MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
397                         SM_FLAG(port, BEGIN) ? "begind " : "",
398                         SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
399                         active ? "LACP active " : "LACP pasive ");
400                 return;
401         }
402
403         is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
404         was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
405
406         /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
407          * Other case: check if timer expire or partners settings changed. */
408         if (!timer_is_stopped(&port->periodic_timer)) {
409                 if (timer_is_expired(&port->periodic_timer)) {
410                         SM_FLAG_SET(port, NTT);
411                 } else if (is_partner_fast != was_partner_fast) {
412                         /* Partners timeout  was slow and now it is fast -> send LACP.
413                          * In other case (was fast and now it is slow) just switch
414                          * timeout to slow without forcing send of LACP (because standard
415                          * say so)*/
416                         if (is_partner_fast)
417                                 SM_FLAG_SET(port, NTT);
418                 } else
419                         return; /* Nothing changed */
420         }
421
422         /* Handle state transition to FAST/SLOW LACP timeout */
423         if (is_partner_fast) {
424                 timeout = internals->mode4.fast_periodic_timeout;
425                 SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
426         } else {
427                 timeout = internals->mode4.slow_periodic_timeout;
428                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
429         }
430
431         timer_set(&port->periodic_timer, timeout);
432 }
433
434 /**
435  * Function handles mux state machine.
436  *
437  * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
438  * It should be called periodically.
439  *
440  * @param port                  Port to handle state machine.
441  */
442 static void
443 mux_machine(struct bond_dev_private *internals, uint16_t slave_id)
444 {
445         struct port *port = &bond_mode_8023ad_ports[slave_id];
446
447         /* Save current state for later use */
448         const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
449                 STATE_COLLECTING;
450
451         /* Enter DETACHED state on BEGIN condition or from any other state if
452          * port was unselected */
453         if (SM_FLAG(port, BEGIN) ||
454                         port->selected == UNSELECTED || (port->selected == STANDBY &&
455                                 (port->actor_state & state_mask) != 0)) {
456                 /* detach mux from aggregator */
457                 port->actor_state &= ~state_mask;
458                 /* Set ntt to true if BEGIN condition or transition from any other state
459                  * which is indicated that wait_while_timer was started */
460                 if (SM_FLAG(port, BEGIN) ||
461                                 !timer_is_stopped(&port->wait_while_timer)) {
462                         SM_FLAG_SET(port, NTT);
463                         MODE4_DEBUG("-> DETACHED\n");
464                 }
465                 timer_cancel(&port->wait_while_timer);
466         }
467
468         if (timer_is_stopped(&port->wait_while_timer)) {
469                 if (port->selected == SELECTED || port->selected == STANDBY) {
470                         timer_set(&port->wait_while_timer,
471                                 internals->mode4.aggregate_wait_timeout);
472
473                         MODE4_DEBUG("DETACHED -> WAITING\n");
474                 }
475                 /* Waiting state entered */
476                 return;
477         }
478
479         /* Transit next state if port is ready */
480         if (!timer_is_expired(&port->wait_while_timer))
481                 return;
482
483         if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
484                 !PARTNER_STATE(port, SYNCHRONIZATION)) {
485                 /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
486                  * sync transit to ATACHED state.  */
487                 ACTOR_STATE_CLR(port, DISTRIBUTING);
488                 ACTOR_STATE_CLR(port, COLLECTING);
489                 /* Clear actor sync to activate transit ATACHED in condition bellow */
490                 ACTOR_STATE_CLR(port, SYNCHRONIZATION);
491                 MODE4_DEBUG("Out of sync -> ATTACHED\n");
492         }
493
494         if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
495                 /* attach mux to aggregator */
496                 RTE_ASSERT((port->actor_state & (STATE_COLLECTING |
497                         STATE_DISTRIBUTING)) == 0);
498
499                 ACTOR_STATE_SET(port, SYNCHRONIZATION);
500                 SM_FLAG_SET(port, NTT);
501                 MODE4_DEBUG("ATTACHED Entered\n");
502         } else if (!ACTOR_STATE(port, COLLECTING)) {
503                 /* Start collecting if in sync */
504                 if (PARTNER_STATE(port, SYNCHRONIZATION)) {
505                         MODE4_DEBUG("ATTACHED -> COLLECTING\n");
506                         ACTOR_STATE_SET(port, COLLECTING);
507                         SM_FLAG_SET(port, NTT);
508                 }
509         } else if (ACTOR_STATE(port, COLLECTING)) {
510                 /* Check if partner is in COLLECTING state. If so this port can
511                  * distribute frames to it */
512                 if (!ACTOR_STATE(port, DISTRIBUTING)) {
513                         if (PARTNER_STATE(port, COLLECTING)) {
514                                 /* Enable  DISTRIBUTING if partner is collecting */
515                                 ACTOR_STATE_SET(port, DISTRIBUTING);
516                                 SM_FLAG_SET(port, NTT);
517                                 MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
518                                 RTE_BOND_LOG(INFO,
519                                         "Bond %u: slave id %u distributing started.",
520                                         internals->port_id, slave_id);
521                         }
522                 } else {
523                         if (!PARTNER_STATE(port, COLLECTING)) {
524                                 /* Disable DISTRIBUTING (enter COLLECTING state) if partner
525                                  * is not collecting */
526                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
527                                 SM_FLAG_SET(port, NTT);
528                                 MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
529                                 RTE_BOND_LOG(INFO,
530                                         "Bond %u: slave id %u distributing stopped.",
531                                         internals->port_id, slave_id);
532                         }
533                 }
534         }
535 }
536
537 /**
538  * Function handles transmit state machine.
539  *
540  * Function implements Transmit Machine from point 5.4.16 in 802.1AX
541  * documentation.
542  *
543  * @param port
544  */
545 static void
546 tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
547 {
548         struct port *agg, *port = &bond_mode_8023ad_ports[slave_id];
549
550         struct rte_mbuf *lacp_pkt = NULL;
551         struct lacpdu_header *hdr;
552         struct lacpdu *lacpdu;
553
554         /* If periodic timer is not running periodic machine is in NO PERIODIC and
555          * according to 802.3ax standard tx machine should not transmit any frames
556          * and set ntt to false. */
557         if (timer_is_stopped(&port->periodic_timer))
558                 SM_FLAG_CLR(port, NTT);
559
560         if (!SM_FLAG(port, NTT))
561                 return;
562
563         if (!timer_is_expired(&port->tx_machine_timer))
564                 return;
565
566         lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
567         if (lacp_pkt == NULL) {
568                 RTE_BOND_LOG(ERR, "Failed to allocate LACP packet from pool");
569                 return;
570         }
571
572         lacp_pkt->data_len = sizeof(*hdr);
573         lacp_pkt->pkt_len = sizeof(*hdr);
574
575         hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
576
577         /* Source and destination MAC */
578         rte_ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
579         rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
580         hdr->eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_SLOW);
581
582         lacpdu = &hdr->lacpdu;
583         memset(lacpdu, 0, sizeof(*lacpdu));
584
585         /* Initialize LACP part */
586         lacpdu->subtype = SLOW_SUBTYPE_LACP;
587         lacpdu->version_number = 1;
588
589         /* ACTOR */
590         lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
591         lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
592         memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
593                         sizeof(port->actor));
594         agg = &bond_mode_8023ad_ports[port->aggregator_port_id];
595         rte_ether_addr_copy(&agg->actor.system,
596                         &hdr->lacpdu.actor.port_params.system);
597         lacpdu->actor.state = port->actor_state;
598
599         /* PARTNER */
600         lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
601         lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
602         memcpy(&lacpdu->partner.port_params, &port->partner,
603                         sizeof(struct port_params));
604         lacpdu->partner.state = port->partner_state;
605
606         /* Other fields */
607         lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
608         lacpdu->collector_info_length = 0x10;
609         lacpdu->collector_max_delay = 0;
610
611         lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
612         lacpdu->terminator_length = 0;
613
614         MODE4_DEBUG("Sending LACP frame\n");
615         BOND_PRINT_LACP(lacpdu);
616
617         if (internals->mode4.dedicated_queues.enabled == 0) {
618                 int retval = rte_ring_enqueue(port->tx_ring, lacp_pkt);
619                 if (retval != 0) {
620                         /* If TX ring full, drop packet and free message.
621                            Retransmission will happen in next function call. */
622                         rte_pktmbuf_free(lacp_pkt);
623                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
624                         return;
625                 }
626         } else {
627                 uint16_t pkts_sent = rte_eth_tx_burst(slave_id,
628                                 internals->mode4.dedicated_queues.tx_qid,
629                                 &lacp_pkt, 1);
630                 if (pkts_sent != 1) {
631                         rte_pktmbuf_free(lacp_pkt);
632                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
633                         return;
634                 }
635         }
636
637
638         timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
639         SM_FLAG_CLR(port, NTT);
640 }
641
642 static uint16_t
643 max_index(uint64_t *a, int n)
644 {
645         if (n <= 0)
646                 return -1;
647
648         int i, max_i = 0;
649         uint64_t max = a[0];
650
651         for (i = 1; i < n; ++i) {
652                 if (a[i] > max) {
653                         max = a[i];
654                         max_i = i;
655                 }
656         }
657
658         return max_i;
659 }
660
661 /**
662  * Function assigns port to aggregator.
663  *
664  * @param bond_dev_private      Pointer to bond_dev_private structure.
665  * @param port_pos                      Port to assign.
666  */
667 static void
668 selection_logic(struct bond_dev_private *internals, uint16_t slave_id)
669 {
670         struct port *agg, *port;
671         uint16_t slaves_count, new_agg_id, i, j = 0;
672         uint16_t *slaves;
673         uint64_t agg_bandwidth[RTE_MAX_ETHPORTS] = {0};
674         uint64_t agg_count[RTE_MAX_ETHPORTS] = {0};
675         uint16_t default_slave = 0;
676         struct rte_eth_link link_info;
677         uint16_t agg_new_idx = 0;
678         int ret;
679
680         slaves = internals->active_slaves;
681         slaves_count = internals->active_slave_count;
682         port = &bond_mode_8023ad_ports[slave_id];
683
684         /* Search for aggregator suitable for this port */
685         for (i = 0; i < slaves_count; ++i) {
686                 agg = &bond_mode_8023ad_ports[slaves[i]];
687                 /* Skip ports that are not aggreagators */
688                 if (agg->aggregator_port_id != slaves[i])
689                         continue;
690
691                 ret = rte_eth_link_get_nowait(slaves[i], &link_info);
692                 if (ret < 0) {
693                         RTE_BOND_LOG(ERR,
694                                 "Slave (port %u) link get failed: %s\n",
695                                 slaves[i], rte_strerror(-ret));
696                         continue;
697                 }
698                 agg_count[i] += 1;
699                 agg_bandwidth[i] += link_info.link_speed;
700
701                 /* Actors system ID is not checked since all slave device have the same
702                  * ID (MAC address). */
703                 if ((agg->actor.key == port->actor.key &&
704                         agg->partner.system_priority == port->partner.system_priority &&
705                         rte_is_same_ether_addr(&agg->partner.system,
706                                         &port->partner.system) == 1
707                         && (agg->partner.key == port->partner.key)) &&
708                         rte_is_zero_ether_addr(&port->partner.system) != 1 &&
709                         (agg->actor.key &
710                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
711
712                         if (j == 0)
713                                 default_slave = i;
714                         j++;
715                 }
716         }
717
718         switch (internals->mode4.agg_selection) {
719         case AGG_COUNT:
720                 agg_new_idx = max_index(agg_count, slaves_count);
721                 new_agg_id = slaves[agg_new_idx];
722                 break;
723         case AGG_BANDWIDTH:
724                 agg_new_idx = max_index(agg_bandwidth, slaves_count);
725                 new_agg_id = slaves[agg_new_idx];
726                 break;
727         case AGG_STABLE:
728                 if (default_slave == slaves_count)
729                         new_agg_id = slaves[slave_id];
730                 else
731                         new_agg_id = slaves[default_slave];
732                 break;
733         default:
734                 if (default_slave == slaves_count)
735                         new_agg_id = slaves[slave_id];
736                 else
737                         new_agg_id = slaves[default_slave];
738                 break;
739         }
740
741         if (new_agg_id != port->aggregator_port_id) {
742                 port->aggregator_port_id = new_agg_id;
743
744                 MODE4_DEBUG("-> SELECTED: ID=%3u\n"
745                         "\t%s aggregator ID=%3u\n",
746                         port->aggregator_port_id,
747                         port->aggregator_port_id == slave_id ?
748                                 "aggregator not found, using default" : "aggregator found",
749                         port->aggregator_port_id);
750         }
751
752         port->selected = SELECTED;
753 }
754
755 /* Function maps DPDK speed to bonding speed stored in key field */
756 static uint16_t
757 link_speed_key(uint16_t speed) {
758         uint16_t key_speed;
759
760         switch (speed) {
761         case ETH_SPEED_NUM_NONE:
762                 key_speed = 0x00;
763                 break;
764         case ETH_SPEED_NUM_10M:
765                 key_speed = BOND_LINK_SPEED_KEY_10M;
766                 break;
767         case ETH_SPEED_NUM_100M:
768                 key_speed = BOND_LINK_SPEED_KEY_100M;
769                 break;
770         case ETH_SPEED_NUM_1G:
771                 key_speed = BOND_LINK_SPEED_KEY_1000M;
772                 break;
773         case ETH_SPEED_NUM_10G:
774                 key_speed = BOND_LINK_SPEED_KEY_10G;
775                 break;
776         case ETH_SPEED_NUM_20G:
777                 key_speed = BOND_LINK_SPEED_KEY_20G;
778                 break;
779         case ETH_SPEED_NUM_40G:
780                 key_speed = BOND_LINK_SPEED_KEY_40G;
781                 break;
782         default:
783                 /* Unknown speed*/
784                 key_speed = 0xFFFF;
785         }
786
787         return key_speed;
788 }
789
790 static void
791 rx_machine_update(struct bond_dev_private *internals, uint16_t slave_id,
792                 struct rte_mbuf *lacp_pkt) {
793         struct lacpdu_header *lacp;
794         struct lacpdu_actor_partner_params *partner;
795
796         if (lacp_pkt != NULL) {
797                 lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
798                 RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
799
800                 partner = &lacp->lacpdu.partner;
801                 if (rte_is_same_ether_addr(&partner->port_params.system,
802                         &internals->mode4.mac_addr)) {
803                         /* This LACP frame is sending to the bonding port
804                          * so pass it to rx_machine.
805                          */
806                         rx_machine(internals, slave_id, &lacp->lacpdu);
807                 }
808                 rte_pktmbuf_free(lacp_pkt);
809         } else
810                 rx_machine(internals, slave_id, NULL);
811 }
812
813 static void
814 bond_mode_8023ad_periodic_cb(void *arg)
815 {
816         struct rte_eth_dev *bond_dev = arg;
817         struct bond_dev_private *internals = bond_dev->data->dev_private;
818         struct port *port;
819         struct rte_eth_link link_info;
820         struct rte_ether_addr slave_addr;
821         struct rte_mbuf *lacp_pkt = NULL;
822         uint16_t slave_id;
823         uint16_t i;
824
825
826         /* Update link status on each port */
827         for (i = 0; i < internals->active_slave_count; i++) {
828                 uint16_t key;
829                 int ret;
830
831                 slave_id = internals->active_slaves[i];
832                 ret = rte_eth_link_get_nowait(slave_id, &link_info);
833                 if (ret < 0) {
834                         RTE_BOND_LOG(ERR,
835                                 "Slave (port %u) link get failed: %s\n",
836                                 slave_id, rte_strerror(-ret));
837                 }
838
839                 if (ret >= 0 && link_info.link_status != 0) {
840                         key = link_speed_key(link_info.link_speed) << 1;
841                         if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
842                                 key |= BOND_LINK_FULL_DUPLEX_KEY;
843                 } else {
844                         key = 0;
845                 }
846
847                 rte_eth_macaddr_get(slave_id, &slave_addr);
848                 port = &bond_mode_8023ad_ports[slave_id];
849
850                 key = rte_cpu_to_be_16(key);
851                 if (key != port->actor.key) {
852                         if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
853                                 set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
854
855                         port->actor.key = key;
856                         SM_FLAG_SET(port, NTT);
857                 }
858
859                 if (!rte_is_same_ether_addr(&port->actor.system, &slave_addr)) {
860                         rte_ether_addr_copy(&slave_addr, &port->actor.system);
861                         if (port->aggregator_port_id == slave_id)
862                                 SM_FLAG_SET(port, NTT);
863                 }
864         }
865
866         for (i = 0; i < internals->active_slave_count; i++) {
867                 slave_id = internals->active_slaves[i];
868                 port = &bond_mode_8023ad_ports[slave_id];
869
870                 if ((port->actor.key &
871                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
872
873                         SM_FLAG_SET(port, BEGIN);
874
875                         /* LACP is disabled on half duples or link is down */
876                         if (SM_FLAG(port, LACP_ENABLED)) {
877                                 /* If port was enabled set it to BEGIN state */
878                                 SM_FLAG_CLR(port, LACP_ENABLED);
879                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
880                                 ACTOR_STATE_CLR(port, COLLECTING);
881                         }
882
883                         /* Skip this port processing */
884                         continue;
885                 }
886
887                 SM_FLAG_SET(port, LACP_ENABLED);
888
889                 if (internals->mode4.dedicated_queues.enabled == 0) {
890                         /* Find LACP packet to this port. Do not check subtype,
891                          * it is done in function that queued packet
892                          */
893                         int retval = rte_ring_dequeue(port->rx_ring,
894                                         (void **)&lacp_pkt);
895
896                         if (retval != 0)
897                                 lacp_pkt = NULL;
898
899                         rx_machine_update(internals, slave_id, lacp_pkt);
900                 } else {
901                         uint16_t rx_count = rte_eth_rx_burst(slave_id,
902                                         internals->mode4.dedicated_queues.rx_qid,
903                                         &lacp_pkt, 1);
904
905                         if (rx_count == 1)
906                                 bond_mode_8023ad_handle_slow_pkt(internals,
907                                                 slave_id, lacp_pkt);
908                         else
909                                 rx_machine_update(internals, slave_id, NULL);
910                 }
911
912                 periodic_machine(internals, slave_id);
913                 mux_machine(internals, slave_id);
914                 tx_machine(internals, slave_id);
915                 selection_logic(internals, slave_id);
916
917                 SM_FLAG_CLR(port, BEGIN);
918                 show_warnings(slave_id);
919         }
920
921         rte_eal_alarm_set(internals->mode4.update_timeout_us,
922                         bond_mode_8023ad_periodic_cb, arg);
923 }
924
925 static int
926 bond_mode_8023ad_register_lacp_mac(uint16_t slave_id)
927 {
928         int ret;
929
930         ret = rte_eth_allmulticast_enable(slave_id);
931         if (ret != 0) {
932                 RTE_BOND_LOG(ERR,
933                         "failed to enable allmulti mode for port %u: %s",
934                         slave_id, rte_strerror(-ret));
935         }
936         if (rte_eth_allmulticast_get(slave_id)) {
937                 RTE_BOND_LOG(DEBUG, "forced allmulti for port %u",
938                              slave_id);
939                 bond_mode_8023ad_ports[slave_id].forced_rx_flags =
940                                 BOND_8023AD_FORCED_ALLMULTI;
941                 return 0;
942         }
943
944         ret = rte_eth_promiscuous_enable(slave_id);
945         if (ret != 0) {
946                 RTE_BOND_LOG(ERR,
947                         "failed to enable promiscuous mode for port %u: %s",
948                         slave_id, rte_strerror(-ret));
949         }
950         if (rte_eth_promiscuous_get(slave_id)) {
951                 RTE_BOND_LOG(DEBUG, "forced promiscuous for port %u",
952                              slave_id);
953                 bond_mode_8023ad_ports[slave_id].forced_rx_flags =
954                                 BOND_8023AD_FORCED_PROMISC;
955                 return 0;
956         }
957
958         return -1;
959 }
960
961 static void
962 bond_mode_8023ad_unregister_lacp_mac(uint16_t slave_id)
963 {
964         int ret;
965
966         switch (bond_mode_8023ad_ports[slave_id].forced_rx_flags) {
967         case BOND_8023AD_FORCED_ALLMULTI:
968                 RTE_BOND_LOG(DEBUG, "unset allmulti for port %u", slave_id);
969                 ret = rte_eth_allmulticast_disable(slave_id);
970                 if (ret != 0)
971                         RTE_BOND_LOG(ERR,
972                                 "failed to disable allmulti mode for port %u: %s",
973                                 slave_id, rte_strerror(-ret));
974                 break;
975
976         case BOND_8023AD_FORCED_PROMISC:
977                 RTE_BOND_LOG(DEBUG, "unset promisc for port %u", slave_id);
978                 ret = rte_eth_promiscuous_disable(slave_id);
979                 if (ret != 0)
980                         RTE_BOND_LOG(ERR,
981                                 "failed to disable promiscuous mode for port %u: %s",
982                                 slave_id, rte_strerror(-ret));
983                 break;
984
985         default:
986                 break;
987         }
988 }
989
990 void
991 bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev,
992                                 uint16_t slave_id)
993 {
994         struct bond_dev_private *internals = bond_dev->data->dev_private;
995
996         struct port *port = &bond_mode_8023ad_ports[slave_id];
997         struct port_params initial = {
998                         .system = { { 0 } },
999                         .system_priority = rte_cpu_to_be_16(0xFFFF),
1000                         .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
1001                         .port_priority = rte_cpu_to_be_16(0x00FF),
1002                         .port_number = 0,
1003         };
1004
1005         char mem_name[RTE_ETH_NAME_MAX_LEN];
1006         int socket_id;
1007         unsigned element_size;
1008         uint32_t total_tx_desc;
1009         struct bond_tx_queue *bd_tx_q;
1010         uint16_t q_id;
1011
1012         /* Given slave mus not be in active list */
1013         RTE_ASSERT(find_slave_by_id(internals->active_slaves,
1014         internals->active_slave_count, slave_id) == internals->active_slave_count);
1015         RTE_SET_USED(internals); /* used only for assert when enabled */
1016
1017         memcpy(&port->actor, &initial, sizeof(struct port_params));
1018         /* Standard requires that port ID must be grater than 0.
1019          * Add 1 do get corresponding port_number */
1020         port->actor.port_number = rte_cpu_to_be_16(slave_id + 1);
1021
1022         memcpy(&port->partner, &initial, sizeof(struct port_params));
1023
1024         /* default states */
1025         port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
1026         port->partner_state = STATE_LACP_ACTIVE | STATE_AGGREGATION;
1027         port->sm_flags = SM_FLAGS_BEGIN;
1028
1029         /* use this port as agregator */
1030         port->aggregator_port_id = slave_id;
1031
1032         if (bond_mode_8023ad_register_lacp_mac(slave_id) < 0) {
1033                 RTE_BOND_LOG(WARNING, "slave %u is most likely broken and won't receive LACP packets",
1034                              slave_id);
1035         }
1036
1037         timer_cancel(&port->warning_timer);
1038
1039         if (port->mbuf_pool != NULL)
1040                 return;
1041
1042         RTE_ASSERT(port->rx_ring == NULL);
1043         RTE_ASSERT(port->tx_ring == NULL);
1044
1045         socket_id = rte_eth_dev_socket_id(slave_id);
1046         if (socket_id == (int)LCORE_ID_ANY)
1047                 socket_id = rte_socket_id();
1048
1049         element_size = sizeof(struct slow_protocol_frame) +
1050                                 RTE_PKTMBUF_HEADROOM;
1051
1052         /* The size of the mempool should be at least:
1053          * the sum of the TX descriptors + BOND_MODE_8023AX_SLAVE_TX_PKTS */
1054         total_tx_desc = BOND_MODE_8023AX_SLAVE_TX_PKTS;
1055         for (q_id = 0; q_id < bond_dev->data->nb_tx_queues; q_id++) {
1056                 bd_tx_q = (struct bond_tx_queue*)bond_dev->data->tx_queues[q_id];
1057                 total_tx_desc += bd_tx_q->nb_tx_desc;
1058         }
1059
1060         snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
1061         port->mbuf_pool = rte_pktmbuf_pool_create(mem_name, total_tx_desc,
1062                 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
1063                         32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
1064                 0, element_size, socket_id);
1065
1066         /* Any memory allocation failure in initialization is critical because
1067          * resources can't be free, so reinitialization is impossible. */
1068         if (port->mbuf_pool == NULL) {
1069                 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1070                         slave_id, mem_name, rte_strerror(rte_errno));
1071         }
1072
1073         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
1074         port->rx_ring = rte_ring_create(mem_name,
1075                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
1076
1077         if (port->rx_ring == NULL) {
1078                 rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
1079                         mem_name, rte_strerror(rte_errno));
1080         }
1081
1082         /* TX ring is at least one pkt longer to make room for marker packet. */
1083         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
1084         port->tx_ring = rte_ring_create(mem_name,
1085                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
1086
1087         if (port->tx_ring == NULL) {
1088                 rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
1089                         mem_name, rte_strerror(rte_errno));
1090         }
1091 }
1092
1093 int
1094 bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev __rte_unused,
1095                 uint16_t slave_id)
1096 {
1097         void *pkt = NULL;
1098         struct port *port = NULL;
1099         uint8_t old_partner_state;
1100
1101         port = &bond_mode_8023ad_ports[slave_id];
1102
1103         ACTOR_STATE_CLR(port, AGGREGATION);
1104         port->selected = UNSELECTED;
1105
1106         old_partner_state = port->partner_state;
1107         record_default(port);
1108
1109         bond_mode_8023ad_unregister_lacp_mac(slave_id);
1110
1111         /* If partner timeout state changes then disable timer */
1112         if (!((old_partner_state ^ port->partner_state) &
1113                         STATE_LACP_SHORT_TIMEOUT))
1114                 timer_cancel(&port->current_while_timer);
1115
1116         PARTNER_STATE_CLR(port, AGGREGATION);
1117         ACTOR_STATE_CLR(port, EXPIRED);
1118
1119         /* flush rx/tx rings */
1120         while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
1121                 rte_pktmbuf_free((struct rte_mbuf *)pkt);
1122
1123         while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
1124                         rte_pktmbuf_free((struct rte_mbuf *)pkt);
1125         return 0;
1126 }
1127
1128 void
1129 bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
1130 {
1131         struct bond_dev_private *internals = bond_dev->data->dev_private;
1132         struct rte_ether_addr slave_addr;
1133         struct port *slave, *agg_slave;
1134         uint16_t slave_id, i, j;
1135
1136         bond_mode_8023ad_stop(bond_dev);
1137
1138         for (i = 0; i < internals->active_slave_count; i++) {
1139                 slave_id = internals->active_slaves[i];
1140                 slave = &bond_mode_8023ad_ports[slave_id];
1141                 rte_eth_macaddr_get(slave_id, &slave_addr);
1142
1143                 if (rte_is_same_ether_addr(&slave_addr, &slave->actor.system))
1144                         continue;
1145
1146                 rte_ether_addr_copy(&slave_addr, &slave->actor.system);
1147                 /* Do nothing if this port is not an aggregator. In other case
1148                  * Set NTT flag on every port that use this aggregator. */
1149                 if (slave->aggregator_port_id != slave_id)
1150                         continue;
1151
1152                 for (j = 0; j < internals->active_slave_count; j++) {
1153                         agg_slave = &bond_mode_8023ad_ports[internals->active_slaves[j]];
1154                         if (agg_slave->aggregator_port_id == slave_id)
1155                                 SM_FLAG_SET(agg_slave, NTT);
1156                 }
1157         }
1158
1159         if (bond_dev->data->dev_started)
1160                 bond_mode_8023ad_start(bond_dev);
1161 }
1162
1163 static void
1164 bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
1165                 struct rte_eth_bond_8023ad_conf *conf)
1166 {
1167         struct bond_dev_private *internals = dev->data->dev_private;
1168         struct mode8023ad_private *mode4 = &internals->mode4;
1169         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1170
1171         conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
1172         conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
1173         conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
1174         conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
1175         conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
1176         conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
1177         conf->update_timeout_ms = mode4->update_timeout_us / 1000;
1178         conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
1179         conf->slowrx_cb = mode4->slowrx_cb;
1180         conf->agg_selection = mode4->agg_selection;
1181 }
1182
1183 static void
1184 bond_mode_8023ad_conf_get_default(struct rte_eth_bond_8023ad_conf *conf)
1185 {
1186         conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
1187         conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
1188         conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
1189         conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
1190         conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
1191         conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
1192         conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
1193         conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
1194         conf->slowrx_cb = NULL;
1195         conf->agg_selection = AGG_STABLE;
1196 }
1197
1198 static void
1199 bond_mode_8023ad_conf_assign(struct mode8023ad_private *mode4,
1200                 struct rte_eth_bond_8023ad_conf *conf)
1201 {
1202         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1203
1204         mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
1205         mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
1206         mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
1207         mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
1208         mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
1209         mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
1210         mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
1211         mode4->update_timeout_us = conf->update_timeout_ms * 1000;
1212
1213         mode4->dedicated_queues.enabled = 0;
1214         mode4->dedicated_queues.rx_qid = UINT16_MAX;
1215         mode4->dedicated_queues.tx_qid = UINT16_MAX;
1216 }
1217
1218 void
1219 bond_mode_8023ad_setup(struct rte_eth_dev *dev,
1220                 struct rte_eth_bond_8023ad_conf *conf)
1221 {
1222         struct rte_eth_bond_8023ad_conf def_conf;
1223         struct bond_dev_private *internals = dev->data->dev_private;
1224         struct mode8023ad_private *mode4 = &internals->mode4;
1225
1226         if (conf == NULL) {
1227                 conf = &def_conf;
1228                 bond_mode_8023ad_conf_get_default(conf);
1229         }
1230
1231         bond_mode_8023ad_stop(dev);
1232         bond_mode_8023ad_conf_assign(mode4, conf);
1233         mode4->slowrx_cb = conf->slowrx_cb;
1234         mode4->agg_selection = AGG_STABLE;
1235
1236         if (dev->data->dev_started)
1237                 bond_mode_8023ad_start(dev);
1238 }
1239
1240 int
1241 bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
1242 {
1243         struct bond_dev_private *internals = bond_dev->data->dev_private;
1244         uint16_t i;
1245
1246         for (i = 0; i < internals->active_slave_count; i++)
1247                 bond_mode_8023ad_activate_slave(bond_dev,
1248                                 internals->active_slaves[i]);
1249
1250         return 0;
1251 }
1252
1253 int
1254 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
1255 {
1256         struct bond_dev_private *internals = bond_dev->data->dev_private;
1257         struct mode8023ad_private *mode4 = &internals->mode4;
1258         static const uint64_t us = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000;
1259
1260         rte_eth_macaddr_get(internals->port_id, &mode4->mac_addr);
1261         if (mode4->slowrx_cb)
1262                 return rte_eal_alarm_set(us, &bond_mode_8023ad_ext_periodic_cb,
1263                                          bond_dev);
1264
1265         return rte_eal_alarm_set(us, &bond_mode_8023ad_periodic_cb, bond_dev);
1266 }
1267
1268 void
1269 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
1270 {
1271         struct bond_dev_private *internals = bond_dev->data->dev_private;
1272         struct mode8023ad_private *mode4 = &internals->mode4;
1273
1274         if (mode4->slowrx_cb) {
1275                 rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb,
1276                                      bond_dev);
1277                 return;
1278         }
1279         rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
1280 }
1281
1282 void
1283 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
1284                                   uint16_t slave_id, struct rte_mbuf *pkt)
1285 {
1286         struct mode8023ad_private *mode4 = &internals->mode4;
1287         struct port *port = &bond_mode_8023ad_ports[slave_id];
1288         struct marker_header *m_hdr;
1289         uint64_t marker_timer, old_marker_timer;
1290         int retval;
1291         uint8_t wrn, subtype;
1292         /* If packet is a marker, we send response now by reusing given packet
1293          * and update only source MAC, destination MAC is multicast so don't
1294          * update it. Other frames will be handled later by state machines */
1295         subtype = rte_pktmbuf_mtod(pkt,
1296                         struct slow_protocol_frame *)->slow_protocol.subtype;
1297
1298         if (subtype == SLOW_SUBTYPE_MARKER) {
1299                 m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
1300
1301                 if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
1302                         wrn = WRN_UNKNOWN_MARKER_TYPE;
1303                         goto free_out;
1304                 }
1305
1306                 /* Setup marker timer. Do it in loop in case concurrent access. */
1307                 do {
1308                         old_marker_timer = port->rx_marker_timer;
1309                         if (!timer_is_expired(&old_marker_timer)) {
1310                                 wrn = WRN_RX_MARKER_TO_FAST;
1311                                 goto free_out;
1312                         }
1313
1314                         timer_set(&marker_timer, mode4->rx_marker_timeout);
1315                         retval = rte_atomic64_cmpset(&port->rx_marker_timer,
1316                                 old_marker_timer, marker_timer);
1317                 } while (unlikely(retval == 0));
1318
1319                 m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
1320                 rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
1321
1322                 if (internals->mode4.dedicated_queues.enabled == 0) {
1323                         int retval = rte_ring_enqueue(port->tx_ring, pkt);
1324                         if (retval != 0) {
1325                                 /* reset timer */
1326                                 port->rx_marker_timer = 0;
1327                                 wrn = WRN_TX_QUEUE_FULL;
1328                                 goto free_out;
1329                         }
1330                 } else {
1331                         /* Send packet directly to the slow queue */
1332                         uint16_t tx_count = rte_eth_tx_burst(slave_id,
1333                                         internals->mode4.dedicated_queues.tx_qid,
1334                                         &pkt, 1);
1335                         if (tx_count != 1) {
1336                                 /* reset timer */
1337                                 port->rx_marker_timer = 0;
1338                                 wrn = WRN_TX_QUEUE_FULL;
1339                                 goto free_out;
1340                         }
1341                 }
1342         } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
1343                 if (internals->mode4.dedicated_queues.enabled == 0) {
1344                         int retval = rte_ring_enqueue(port->rx_ring, pkt);
1345                         if (retval != 0) {
1346                                 /* If RX fing full free lacpdu message and drop packet */
1347                                 wrn = WRN_RX_QUEUE_FULL;
1348                                 goto free_out;
1349                         }
1350                 } else
1351                         rx_machine_update(internals, slave_id, pkt);
1352         } else {
1353                 wrn = WRN_UNKNOWN_SLOW_TYPE;
1354                 goto free_out;
1355         }
1356
1357         return;
1358
1359 free_out:
1360         set_warning_flags(port, wrn);
1361         rte_pktmbuf_free(pkt);
1362 }
1363
1364 int
1365 rte_eth_bond_8023ad_conf_get(uint16_t port_id,
1366                 struct rte_eth_bond_8023ad_conf *conf)
1367 {
1368         struct rte_eth_dev *bond_dev;
1369
1370         if (valid_bonded_port_id(port_id) != 0)
1371                 return -EINVAL;
1372
1373         if (conf == NULL)
1374                 return -EINVAL;
1375
1376         bond_dev = &rte_eth_devices[port_id];
1377         bond_mode_8023ad_conf_get(bond_dev, conf);
1378         return 0;
1379 }
1380
1381 int
1382 rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id,
1383                 enum rte_bond_8023ad_agg_selection agg_selection)
1384 {
1385         struct rte_eth_dev *bond_dev;
1386         struct bond_dev_private *internals;
1387         struct mode8023ad_private *mode4;
1388
1389         if (valid_bonded_port_id(port_id) != 0)
1390                 return -EINVAL;
1391
1392         bond_dev = &rte_eth_devices[port_id];
1393         internals = bond_dev->data->dev_private;
1394
1395         if (internals->mode != 4)
1396                 return -EINVAL;
1397
1398         mode4 = &internals->mode4;
1399         if (agg_selection == AGG_COUNT || agg_selection == AGG_BANDWIDTH
1400                         || agg_selection == AGG_STABLE)
1401                 mode4->agg_selection = agg_selection;
1402         return 0;
1403 }
1404
1405 int rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id)
1406 {
1407         struct rte_eth_dev *bond_dev;
1408         struct bond_dev_private *internals;
1409         struct mode8023ad_private *mode4;
1410
1411         if (valid_bonded_port_id(port_id) != 0)
1412                 return -EINVAL;
1413
1414         bond_dev = &rte_eth_devices[port_id];
1415         internals = bond_dev->data->dev_private;
1416
1417         if (internals->mode != 4)
1418                 return -EINVAL;
1419         mode4 = &internals->mode4;
1420
1421         return mode4->agg_selection;
1422 }
1423
1424
1425
1426 static int
1427 bond_8023ad_setup_validate(uint16_t port_id,
1428                 struct rte_eth_bond_8023ad_conf *conf)
1429 {
1430         if (valid_bonded_port_id(port_id) != 0)
1431                 return -EINVAL;
1432
1433         if (conf != NULL) {
1434                 /* Basic sanity check */
1435                 if (conf->slow_periodic_ms == 0 ||
1436                                 conf->fast_periodic_ms >= conf->slow_periodic_ms ||
1437                                 conf->long_timeout_ms == 0 ||
1438                                 conf->short_timeout_ms >= conf->long_timeout_ms ||
1439                                 conf->aggregate_wait_timeout_ms == 0 ||
1440                                 conf->tx_period_ms == 0 ||
1441                                 conf->rx_marker_period_ms == 0 ||
1442                                 conf->update_timeout_ms == 0) {
1443                         RTE_BOND_LOG(ERR, "given mode 4 configuration is invalid");
1444                         return -EINVAL;
1445                 }
1446         }
1447
1448         return 0;
1449 }
1450
1451
1452 int
1453 rte_eth_bond_8023ad_setup(uint16_t port_id,
1454                 struct rte_eth_bond_8023ad_conf *conf)
1455 {
1456         struct rte_eth_dev *bond_dev;
1457         int err;
1458
1459         err = bond_8023ad_setup_validate(port_id, conf);
1460         if (err != 0)
1461                 return err;
1462
1463         bond_dev = &rte_eth_devices[port_id];
1464         bond_mode_8023ad_setup(bond_dev, conf);
1465
1466         return 0;
1467 }
1468
1469
1470
1471
1472
1473 int
1474 rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id,
1475                 struct rte_eth_bond_8023ad_slave_info *info)
1476 {
1477         struct rte_eth_dev *bond_dev;
1478         struct bond_dev_private *internals;
1479         struct port *port;
1480
1481         if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
1482                         rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1483                 return -EINVAL;
1484
1485         bond_dev = &rte_eth_devices[port_id];
1486
1487         internals = bond_dev->data->dev_private;
1488         if (find_slave_by_id(internals->active_slaves,
1489                         internals->active_slave_count, slave_id) ==
1490                                 internals->active_slave_count)
1491                 return -EINVAL;
1492
1493         port = &bond_mode_8023ad_ports[slave_id];
1494         info->selected = port->selected;
1495
1496         info->actor_state = port->actor_state;
1497         rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
1498
1499         info->partner_state = port->partner_state;
1500         rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
1501
1502         info->agg_port_id = port->aggregator_port_id;
1503         return 0;
1504 }
1505
1506 static int
1507 bond_8023ad_ext_validate(uint16_t port_id, uint16_t slave_id)
1508 {
1509         struct rte_eth_dev *bond_dev;
1510         struct bond_dev_private *internals;
1511         struct mode8023ad_private *mode4;
1512
1513         if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1514                 return -EINVAL;
1515
1516         bond_dev = &rte_eth_devices[port_id];
1517
1518         if (!bond_dev->data->dev_started)
1519                 return -EINVAL;
1520
1521         internals = bond_dev->data->dev_private;
1522         if (find_slave_by_id(internals->active_slaves,
1523                         internals->active_slave_count, slave_id) ==
1524                                 internals->active_slave_count)
1525                 return -EINVAL;
1526
1527         mode4 = &internals->mode4;
1528         if (mode4->slowrx_cb == NULL)
1529                 return -EINVAL;
1530
1531         return 0;
1532 }
1533
1534 int
1535 rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id,
1536                                 int enabled)
1537 {
1538         struct port *port;
1539         int res;
1540
1541         res = bond_8023ad_ext_validate(port_id, slave_id);
1542         if (res != 0)
1543                 return res;
1544
1545         port = &bond_mode_8023ad_ports[slave_id];
1546
1547         if (enabled)
1548                 ACTOR_STATE_SET(port, COLLECTING);
1549         else
1550                 ACTOR_STATE_CLR(port, COLLECTING);
1551
1552         return 0;
1553 }
1554
1555 int
1556 rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id,
1557                                 int enabled)
1558 {
1559         struct port *port;
1560         int res;
1561
1562         res = bond_8023ad_ext_validate(port_id, slave_id);
1563         if (res != 0)
1564                 return res;
1565
1566         port = &bond_mode_8023ad_ports[slave_id];
1567
1568         if (enabled)
1569                 ACTOR_STATE_SET(port, DISTRIBUTING);
1570         else
1571                 ACTOR_STATE_CLR(port, DISTRIBUTING);
1572
1573         return 0;
1574 }
1575
1576 int
1577 rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id)
1578 {
1579         struct port *port;
1580         int err;
1581
1582         err = bond_8023ad_ext_validate(port_id, slave_id);
1583         if (err != 0)
1584                 return err;
1585
1586         port = &bond_mode_8023ad_ports[slave_id];
1587         return ACTOR_STATE(port, DISTRIBUTING);
1588 }
1589
1590 int
1591 rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id)
1592 {
1593         struct port *port;
1594         int err;
1595
1596         err = bond_8023ad_ext_validate(port_id, slave_id);
1597         if (err != 0)
1598                 return err;
1599
1600         port = &bond_mode_8023ad_ports[slave_id];
1601         return ACTOR_STATE(port, COLLECTING);
1602 }
1603
1604 int
1605 rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id,
1606                 struct rte_mbuf *lacp_pkt)
1607 {
1608         struct port *port;
1609         int res;
1610
1611         res = bond_8023ad_ext_validate(port_id, slave_id);
1612         if (res != 0)
1613                 return res;
1614
1615         port = &bond_mode_8023ad_ports[slave_id];
1616
1617         if (rte_pktmbuf_pkt_len(lacp_pkt) < sizeof(struct lacpdu_header))
1618                 return -EINVAL;
1619
1620         struct lacpdu_header *lacp;
1621
1622         /* only enqueue LACPDUs */
1623         lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
1624         if (lacp->lacpdu.subtype != SLOW_SUBTYPE_LACP)
1625                 return -EINVAL;
1626
1627         MODE4_DEBUG("sending LACP frame\n");
1628
1629         return rte_ring_enqueue(port->tx_ring, lacp_pkt);
1630 }
1631
1632 static void
1633 bond_mode_8023ad_ext_periodic_cb(void *arg)
1634 {
1635         struct rte_eth_dev *bond_dev = arg;
1636         struct bond_dev_private *internals = bond_dev->data->dev_private;
1637         struct mode8023ad_private *mode4 = &internals->mode4;
1638         struct port *port;
1639         void *pkt = NULL;
1640         uint16_t i, slave_id;
1641
1642         for (i = 0; i < internals->active_slave_count; i++) {
1643                 slave_id = internals->active_slaves[i];
1644                 port = &bond_mode_8023ad_ports[slave_id];
1645
1646                 if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
1647                         struct rte_mbuf *lacp_pkt = pkt;
1648                         struct lacpdu_header *lacp;
1649
1650                         lacp = rte_pktmbuf_mtod(lacp_pkt,
1651                                                 struct lacpdu_header *);
1652                         RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
1653
1654                         /* This is LACP frame so pass it to rx callback.
1655                          * Callback is responsible for freeing mbuf.
1656                          */
1657                         mode4->slowrx_cb(slave_id, lacp_pkt);
1658                 }
1659         }
1660
1661         rte_eal_alarm_set(internals->mode4.update_timeout_us,
1662                         bond_mode_8023ad_ext_periodic_cb, arg);
1663 }
1664
1665 int
1666 rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port)
1667 {
1668         int retval = 0;
1669         struct rte_eth_dev *dev;
1670         struct bond_dev_private *internals;
1671
1672         if (valid_bonded_port_id(port) != 0)
1673                 return -EINVAL;
1674
1675         dev = &rte_eth_devices[port];
1676         internals = dev->data->dev_private;
1677
1678         if (check_for_bonded_ethdev(dev) != 0)
1679                 return -1;
1680
1681         if (bond_8023ad_slow_pkt_hw_filter_supported(port) != 0)
1682                 return -1;
1683
1684         /* Device must be stopped to set up slow queue */
1685         if (dev->data->dev_started)
1686                 return -1;
1687
1688         internals->mode4.dedicated_queues.enabled = 1;
1689
1690         bond_ethdev_mode_set(dev, internals->mode);
1691         return retval;
1692 }
1693
1694 int
1695 rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port)
1696 {
1697         int retval = 0;
1698         struct rte_eth_dev *dev;
1699         struct bond_dev_private *internals;
1700
1701         if (valid_bonded_port_id(port) != 0)
1702                 return -EINVAL;
1703
1704         dev = &rte_eth_devices[port];
1705         internals = dev->data->dev_private;
1706
1707         if (check_for_bonded_ethdev(dev) != 0)
1708                 return -1;
1709
1710         /* Device must be stopped to set up slow queue */
1711         if (dev->data->dev_started)
1712                 return -1;
1713
1714         internals->mode4.dedicated_queues.enabled = 0;
1715
1716         bond_ethdev_mode_set(dev, internals->mode);
1717
1718         return retval;
1719 }