net/bonding: prefer allmulti to promiscuous for LACP
[dpdk.git] / drivers / net / bonding / rte_eth_bond_8023ad.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <stddef.h>
6 #include <string.h>
7 #include <stdbool.h>
8
9 #include <rte_alarm.h>
10 #include <rte_malloc.h>
11 #include <rte_errno.h>
12 #include <rte_cycles.h>
13 #include <rte_compat.h>
14
15 #include "rte_eth_bond_private.h"
16
17 static void bond_mode_8023ad_ext_periodic_cb(void *arg);
18 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
19
20 #define MODE4_DEBUG(fmt, ...)                           \
21         rte_log(RTE_LOG_DEBUG, bond_logtype,            \
22                 "%6u [Port %u: %s] " fmt,               \
23                 bond_dbg_get_time_diff_ms(), slave_id,  \
24                 __func__, ##__VA_ARGS__)
25
26 static uint64_t start_time;
27
28 static unsigned
29 bond_dbg_get_time_diff_ms(void)
30 {
31         uint64_t now;
32
33         now = rte_rdtsc();
34         if (start_time == 0)
35                 start_time = now;
36
37         return ((now - start_time) * 1000) / rte_get_tsc_hz();
38 }
39
40 static void
41 bond_print_lacp(struct lacpdu *l)
42 {
43         char a_address[18];
44         char p_address[18];
45         char a_state[256] = { 0 };
46         char p_state[256] = { 0 };
47
48         static const char * const state_labels[] = {
49                 "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
50         };
51
52         int a_len = 0;
53         int p_len = 0;
54         uint8_t i;
55         uint8_t *addr;
56
57         addr = l->actor.port_params.system.addr_bytes;
58         snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
59                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
60
61         addr = l->partner.port_params.system.addr_bytes;
62         snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
63                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
64
65         for (i = 0; i < 8; i++) {
66                 if ((l->actor.state >> i) & 1) {
67                         a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
68                                 state_labels[i]);
69                 }
70
71                 if ((l->partner.state >> i) & 1) {
72                         p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
73                                 state_labels[i]);
74                 }
75         }
76
77         if (a_len && a_state[a_len-1] == ' ')
78                 a_state[a_len-1] = '\0';
79
80         if (p_len && p_state[p_len-1] == ' ')
81                 p_state[p_len-1] = '\0';
82
83         RTE_BOND_LOG(DEBUG,
84                      "LACP: {\n"
85                      "  subtype= %02X\n"
86                      "  ver_num=%02X\n"
87                      "  actor={ tlv=%02X, len=%02X\n"
88                      "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"
89                      "       state={ %s }\n"
90                      "  }\n"
91                      "  partner={ tlv=%02X, len=%02X\n"
92                      "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"
93                      "       state={ %s }\n"
94                      "  }\n"
95                      "  collector={info=%02X, length=%02X, max_delay=%04X\n, "
96                      "type_term=%02X, terminator_length = %02X }",
97                      l->subtype,
98                      l->version_number,
99                      l->actor.tlv_type_info,
100                      l->actor.info_length,
101                      l->actor.port_params.system_priority,
102                      a_address,
103                      l->actor.port_params.key,
104                      l->actor.port_params.port_priority,
105                      l->actor.port_params.port_number,
106                      a_state,
107                      l->partner.tlv_type_info,
108                      l->partner.info_length,
109                      l->partner.port_params.system_priority,
110                      p_address,
111                      l->partner.port_params.key,
112                      l->partner.port_params.port_priority,
113                      l->partner.port_params.port_number,
114                      p_state,
115                      l->tlv_type_collector_info,
116                      l->collector_info_length,
117                      l->collector_max_delay,
118                      l->tlv_type_terminator,
119                      l->terminator_length);
120
121 }
122
123 #define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
124 #else
125 #define BOND_PRINT_LACP(lacpdu) do { } while (0)
126 #define MODE4_DEBUG(fmt, ...) do { } while (0)
127 #endif
128
129 static const struct rte_ether_addr lacp_mac_addr = {
130         .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
131 };
132
133 struct port bond_mode_8023ad_ports[RTE_MAX_ETHPORTS];
134
135 static void
136 timer_cancel(uint64_t *timer)
137 {
138         *timer = 0;
139 }
140
141 static void
142 timer_set(uint64_t *timer, uint64_t timeout)
143 {
144         *timer = rte_rdtsc() + timeout;
145 }
146
147 /* Forces given timer to be in expired state. */
148 static void
149 timer_force_expired(uint64_t *timer)
150 {
151         *timer = rte_rdtsc();
152 }
153
154 static bool
155 timer_is_stopped(uint64_t *timer)
156 {
157         return *timer == 0;
158 }
159
160 static bool
161 timer_is_expired(uint64_t *timer)
162 {
163         return *timer < rte_rdtsc();
164 }
165
166 /* Timer is in running state if it is not stopped nor expired */
167 static bool
168 timer_is_running(uint64_t *timer)
169 {
170         return !timer_is_stopped(timer) && !timer_is_expired(timer);
171 }
172
173 static void
174 set_warning_flags(struct port *port, uint16_t flags)
175 {
176         int retval;
177         uint16_t old;
178         uint16_t new_flag = 0;
179
180         do {
181                 old = port->warnings_to_show;
182                 new_flag = old | flags;
183                 retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
184         } while (unlikely(retval == 0));
185 }
186
187 static void
188 show_warnings(uint16_t slave_id)
189 {
190         struct port *port = &bond_mode_8023ad_ports[slave_id];
191         uint8_t warnings;
192
193         do {
194                 warnings = port->warnings_to_show;
195         } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
196
197         if (!warnings)
198                 return;
199
200         if (!timer_is_expired(&port->warning_timer))
201                 return;
202
203
204         timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
205                         rte_get_tsc_hz() / 1000);
206
207         if (warnings & WRN_RX_QUEUE_FULL) {
208                 RTE_BOND_LOG(DEBUG,
209                              "Slave %u: failed to enqueue LACP packet into RX ring.\n"
210                              "Receive and transmit functions must be invoked on bonded"
211                              "interface at least 10 times per second or LACP will notwork correctly",
212                              slave_id);
213         }
214
215         if (warnings & WRN_TX_QUEUE_FULL) {
216                 RTE_BOND_LOG(DEBUG,
217                              "Slave %u: failed to enqueue LACP packet into TX ring.\n"
218                              "Receive and transmit functions must be invoked on bonded"
219                              "interface at least 10 times per second or LACP will not work correctly",
220                              slave_id);
221         }
222
223         if (warnings & WRN_RX_MARKER_TO_FAST)
224                 RTE_BOND_LOG(INFO, "Slave %u: marker to early - ignoring.",
225                              slave_id);
226
227         if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
228                 RTE_BOND_LOG(INFO,
229                         "Slave %u: ignoring unknown slow protocol frame type",
230                              slave_id);
231         }
232
233         if (warnings & WRN_UNKNOWN_MARKER_TYPE)
234                 RTE_BOND_LOG(INFO, "Slave %u: ignoring unknown marker type",
235                              slave_id);
236
237         if (warnings & WRN_NOT_LACP_CAPABLE)
238                 MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
239 }
240
241 static void
242 record_default(struct port *port)
243 {
244         /* Record default parameters for partner. Partner admin parameters
245          * are not implemented so set them to arbitrary default (last known) and
246          * mark actor that parner is in defaulted state. */
247         port->partner_state = STATE_LACP_ACTIVE;
248         ACTOR_STATE_SET(port, DEFAULTED);
249 }
250
251 /** Function handles rx state machine.
252  *
253  * This function implements Receive State Machine from point 5.4.12 in
254  * 802.1AX documentation. It should be called periodically.
255  *
256  * @param lacpdu                LACPDU received.
257  * @param port                  Port on which LACPDU was received.
258  */
259 static void
260 rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
261                 struct lacpdu *lacp)
262 {
263         struct port *agg, *port = &bond_mode_8023ad_ports[slave_id];
264         uint64_t timeout;
265
266         if (SM_FLAG(port, BEGIN)) {
267                 /* Initialize stuff */
268                 MODE4_DEBUG("-> INITIALIZE\n");
269                 SM_FLAG_CLR(port, MOVED);
270                 port->selected = UNSELECTED;
271
272                 record_default(port);
273
274                 ACTOR_STATE_CLR(port, EXPIRED);
275                 timer_cancel(&port->current_while_timer);
276
277                 /* DISABLED: On initialization partner is out of sync */
278                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
279
280                 /* LACP DISABLED stuff if LACP not enabled on this port */
281                 if (!SM_FLAG(port, LACP_ENABLED))
282                         PARTNER_STATE_CLR(port, AGGREGATION);
283                 else
284                         PARTNER_STATE_SET(port, AGGREGATION);
285         }
286
287         if (!SM_FLAG(port, LACP_ENABLED)) {
288                 /* Update parameters only if state changed */
289                 if (!timer_is_stopped(&port->current_while_timer)) {
290                         port->selected = UNSELECTED;
291                         record_default(port);
292                         PARTNER_STATE_CLR(port, AGGREGATION);
293                         ACTOR_STATE_CLR(port, EXPIRED);
294                         timer_cancel(&port->current_while_timer);
295                 }
296                 return;
297         }
298
299         if (lacp) {
300                 MODE4_DEBUG("LACP -> CURRENT\n");
301                 BOND_PRINT_LACP(lacp);
302                 /* Update selected flag. If partner parameters are defaulted assume they
303                  * are match. If not defaulted  compare LACP actor with ports parner
304                  * params. */
305                 if (!ACTOR_STATE(port, DEFAULTED) &&
306                         (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
307                         || memcmp(&port->partner, &lacp->actor.port_params,
308                                 sizeof(port->partner)) != 0)) {
309                         MODE4_DEBUG("selected <- UNSELECTED\n");
310                         port->selected = UNSELECTED;
311                 }
312
313                 /* Record this PDU actor params as partner params */
314                 memcpy(&port->partner, &lacp->actor.port_params,
315                         sizeof(struct port_params));
316                 port->partner_state = lacp->actor.state;
317
318                 /* Partner parameters are not defaulted any more */
319                 ACTOR_STATE_CLR(port, DEFAULTED);
320
321                 /* If LACP partner params match this port actor params */
322                 agg = &bond_mode_8023ad_ports[port->aggregator_port_id];
323                 bool match = port->actor.system_priority ==
324                         lacp->partner.port_params.system_priority &&
325                         rte_is_same_ether_addr(&agg->actor.system,
326                         &lacp->partner.port_params.system) &&
327                         port->actor.port_priority ==
328                         lacp->partner.port_params.port_priority &&
329                         port->actor.port_number ==
330                         lacp->partner.port_params.port_number;
331
332                 /* Update NTT if partners information are outdated (xored and masked
333                  * bits are set)*/
334                 uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
335                         STATE_SYNCHRONIZATION | STATE_AGGREGATION;
336
337                 if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
338                                 match == false) {
339                         SM_FLAG_SET(port, NTT);
340                 }
341
342                 /* If LACP partner params match this port actor params */
343                 if (match == true && ACTOR_STATE(port, AGGREGATION) ==
344                                 PARTNER_STATE(port,     AGGREGATION))
345                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
346                 else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
347                                 AGGREGATION))
348                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
349                 else
350                         PARTNER_STATE_CLR(port, SYNCHRONIZATION);
351
352                 if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
353                         timeout = internals->mode4.short_timeout;
354                 else
355                         timeout = internals->mode4.long_timeout;
356
357                 timer_set(&port->current_while_timer, timeout);
358                 ACTOR_STATE_CLR(port, EXPIRED);
359                 return; /* No state change */
360         }
361
362         /* If CURRENT state timer is not running (stopped or expired)
363          * transit to EXPIRED state from DISABLED or CURRENT */
364         if (!timer_is_running(&port->current_while_timer)) {
365                 ACTOR_STATE_SET(port, EXPIRED);
366                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
367                 PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
368                 timer_set(&port->current_while_timer, internals->mode4.short_timeout);
369         }
370 }
371
372 /**
373  * Function handles periodic tx state machine.
374  *
375  * Function implements Periodic Transmission state machine from point 5.4.13
376  * in 802.1AX documentation. It should be called periodically.
377  *
378  * @param port                  Port to handle state machine.
379  */
380 static void
381 periodic_machine(struct bond_dev_private *internals, uint16_t slave_id)
382 {
383         struct port *port = &bond_mode_8023ad_ports[slave_id];
384         /* Calculate if either site is LACP enabled */
385         uint64_t timeout;
386         uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
387                 PARTNER_STATE(port, LACP_ACTIVE);
388
389         uint8_t is_partner_fast, was_partner_fast;
390         /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
391         if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
392                 timer_cancel(&port->periodic_timer);
393                 timer_force_expired(&port->tx_machine_timer);
394                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
395
396                 MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
397                         SM_FLAG(port, BEGIN) ? "begind " : "",
398                         SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
399                         active ? "LACP active " : "LACP pasive ");
400                 return;
401         }
402
403         is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
404         was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
405
406         /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
407          * Other case: check if timer expire or partners settings changed. */
408         if (!timer_is_stopped(&port->periodic_timer)) {
409                 if (timer_is_expired(&port->periodic_timer)) {
410                         SM_FLAG_SET(port, NTT);
411                 } else if (is_partner_fast != was_partner_fast) {
412                         /* Partners timeout  was slow and now it is fast -> send LACP.
413                          * In other case (was fast and now it is slow) just switch
414                          * timeout to slow without forcing send of LACP (because standard
415                          * say so)*/
416                         if (is_partner_fast)
417                                 SM_FLAG_SET(port, NTT);
418                 } else
419                         return; /* Nothing changed */
420         }
421
422         /* Handle state transition to FAST/SLOW LACP timeout */
423         if (is_partner_fast) {
424                 timeout = internals->mode4.fast_periodic_timeout;
425                 SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
426         } else {
427                 timeout = internals->mode4.slow_periodic_timeout;
428                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
429         }
430
431         timer_set(&port->periodic_timer, timeout);
432 }
433
434 /**
435  * Function handles mux state machine.
436  *
437  * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
438  * It should be called periodically.
439  *
440  * @param port                  Port to handle state machine.
441  */
442 static void
443 mux_machine(struct bond_dev_private *internals, uint16_t slave_id)
444 {
445         struct port *port = &bond_mode_8023ad_ports[slave_id];
446
447         /* Save current state for later use */
448         const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
449                 STATE_COLLECTING;
450
451         /* Enter DETACHED state on BEGIN condition or from any other state if
452          * port was unselected */
453         if (SM_FLAG(port, BEGIN) ||
454                         port->selected == UNSELECTED || (port->selected == STANDBY &&
455                                 (port->actor_state & state_mask) != 0)) {
456                 /* detach mux from aggregator */
457                 port->actor_state &= ~state_mask;
458                 /* Set ntt to true if BEGIN condition or transition from any other state
459                  * which is indicated that wait_while_timer was started */
460                 if (SM_FLAG(port, BEGIN) ||
461                                 !timer_is_stopped(&port->wait_while_timer)) {
462                         SM_FLAG_SET(port, NTT);
463                         MODE4_DEBUG("-> DETACHED\n");
464                 }
465                 timer_cancel(&port->wait_while_timer);
466         }
467
468         if (timer_is_stopped(&port->wait_while_timer)) {
469                 if (port->selected == SELECTED || port->selected == STANDBY) {
470                         timer_set(&port->wait_while_timer,
471                                 internals->mode4.aggregate_wait_timeout);
472
473                         MODE4_DEBUG("DETACHED -> WAITING\n");
474                 }
475                 /* Waiting state entered */
476                 return;
477         }
478
479         /* Transit next state if port is ready */
480         if (!timer_is_expired(&port->wait_while_timer))
481                 return;
482
483         if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
484                 !PARTNER_STATE(port, SYNCHRONIZATION)) {
485                 /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
486                  * sync transit to ATACHED state.  */
487                 ACTOR_STATE_CLR(port, DISTRIBUTING);
488                 ACTOR_STATE_CLR(port, COLLECTING);
489                 /* Clear actor sync to activate transit ATACHED in condition bellow */
490                 ACTOR_STATE_CLR(port, SYNCHRONIZATION);
491                 MODE4_DEBUG("Out of sync -> ATTACHED\n");
492         }
493
494         if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
495                 /* attach mux to aggregator */
496                 RTE_ASSERT((port->actor_state & (STATE_COLLECTING |
497                         STATE_DISTRIBUTING)) == 0);
498
499                 ACTOR_STATE_SET(port, SYNCHRONIZATION);
500                 SM_FLAG_SET(port, NTT);
501                 MODE4_DEBUG("ATTACHED Entered\n");
502         } else if (!ACTOR_STATE(port, COLLECTING)) {
503                 /* Start collecting if in sync */
504                 if (PARTNER_STATE(port, SYNCHRONIZATION)) {
505                         MODE4_DEBUG("ATTACHED -> COLLECTING\n");
506                         ACTOR_STATE_SET(port, COLLECTING);
507                         SM_FLAG_SET(port, NTT);
508                 }
509         } else if (ACTOR_STATE(port, COLLECTING)) {
510                 /* Check if partner is in COLLECTING state. If so this port can
511                  * distribute frames to it */
512                 if (!ACTOR_STATE(port, DISTRIBUTING)) {
513                         if (PARTNER_STATE(port, COLLECTING)) {
514                                 /* Enable  DISTRIBUTING if partner is collecting */
515                                 ACTOR_STATE_SET(port, DISTRIBUTING);
516                                 SM_FLAG_SET(port, NTT);
517                                 MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
518                                 RTE_BOND_LOG(INFO,
519                                         "Bond %u: slave id %u distributing started.",
520                                         internals->port_id, slave_id);
521                         }
522                 } else {
523                         if (!PARTNER_STATE(port, COLLECTING)) {
524                                 /* Disable DISTRIBUTING (enter COLLECTING state) if partner
525                                  * is not collecting */
526                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
527                                 SM_FLAG_SET(port, NTT);
528                                 MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
529                                 RTE_BOND_LOG(INFO,
530                                         "Bond %u: slave id %u distributing stopped.",
531                                         internals->port_id, slave_id);
532                         }
533                 }
534         }
535 }
536
537 /**
538  * Function handles transmit state machine.
539  *
540  * Function implements Transmit Machine from point 5.4.16 in 802.1AX
541  * documentation.
542  *
543  * @param port
544  */
545 static void
546 tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
547 {
548         struct port *agg, *port = &bond_mode_8023ad_ports[slave_id];
549
550         struct rte_mbuf *lacp_pkt = NULL;
551         struct lacpdu_header *hdr;
552         struct lacpdu *lacpdu;
553
554         /* If periodic timer is not running periodic machine is in NO PERIODIC and
555          * according to 802.3ax standard tx machine should not transmit any frames
556          * and set ntt to false. */
557         if (timer_is_stopped(&port->periodic_timer))
558                 SM_FLAG_CLR(port, NTT);
559
560         if (!SM_FLAG(port, NTT))
561                 return;
562
563         if (!timer_is_expired(&port->tx_machine_timer))
564                 return;
565
566         lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
567         if (lacp_pkt == NULL) {
568                 RTE_BOND_LOG(ERR, "Failed to allocate LACP packet from pool");
569                 return;
570         }
571
572         lacp_pkt->data_len = sizeof(*hdr);
573         lacp_pkt->pkt_len = sizeof(*hdr);
574
575         hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
576
577         /* Source and destination MAC */
578         rte_ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
579         rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
580         hdr->eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_SLOW);
581
582         lacpdu = &hdr->lacpdu;
583         memset(lacpdu, 0, sizeof(*lacpdu));
584
585         /* Initialize LACP part */
586         lacpdu->subtype = SLOW_SUBTYPE_LACP;
587         lacpdu->version_number = 1;
588
589         /* ACTOR */
590         lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
591         lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
592         memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
593                         sizeof(port->actor));
594         agg = &bond_mode_8023ad_ports[port->aggregator_port_id];
595         rte_ether_addr_copy(&agg->actor.system,
596                         &hdr->lacpdu.actor.port_params.system);
597         lacpdu->actor.state = port->actor_state;
598
599         /* PARTNER */
600         lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
601         lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
602         memcpy(&lacpdu->partner.port_params, &port->partner,
603                         sizeof(struct port_params));
604         lacpdu->partner.state = port->partner_state;
605
606         /* Other fields */
607         lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
608         lacpdu->collector_info_length = 0x10;
609         lacpdu->collector_max_delay = 0;
610
611         lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
612         lacpdu->terminator_length = 0;
613
614         MODE4_DEBUG("Sending LACP frame\n");
615         BOND_PRINT_LACP(lacpdu);
616
617         if (internals->mode4.dedicated_queues.enabled == 0) {
618                 int retval = rte_ring_enqueue(port->tx_ring, lacp_pkt);
619                 if (retval != 0) {
620                         /* If TX ring full, drop packet and free message.
621                            Retransmission will happen in next function call. */
622                         rte_pktmbuf_free(lacp_pkt);
623                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
624                         return;
625                 }
626         } else {
627                 uint16_t pkts_sent = rte_eth_tx_burst(slave_id,
628                                 internals->mode4.dedicated_queues.tx_qid,
629                                 &lacp_pkt, 1);
630                 if (pkts_sent != 1) {
631                         rte_pktmbuf_free(lacp_pkt);
632                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
633                         return;
634                 }
635         }
636
637
638         timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
639         SM_FLAG_CLR(port, NTT);
640 }
641
642 static uint8_t
643 max_index(uint64_t *a, int n)
644 {
645         if (n <= 0)
646                 return -1;
647
648         int i, max_i = 0;
649         uint64_t max = a[0];
650
651         for (i = 1; i < n; ++i) {
652                 if (a[i] > max) {
653                         max = a[i];
654                         max_i = i;
655                 }
656         }
657
658         return max_i;
659 }
660
661 /**
662  * Function assigns port to aggregator.
663  *
664  * @param bond_dev_private      Pointer to bond_dev_private structure.
665  * @param port_pos                      Port to assign.
666  */
667 static void
668 selection_logic(struct bond_dev_private *internals, uint16_t slave_id)
669 {
670         struct port *agg, *port;
671         uint16_t slaves_count, new_agg_id, i, j = 0;
672         uint16_t *slaves;
673         uint64_t agg_bandwidth[8] = {0};
674         uint64_t agg_count[8] = {0};
675         uint16_t default_slave = 0;
676         uint8_t mode_count_id, mode_band_id;
677         struct rte_eth_link link_info;
678
679         slaves = internals->active_slaves;
680         slaves_count = internals->active_slave_count;
681         port = &bond_mode_8023ad_ports[slave_id];
682
683         /* Search for aggregator suitable for this port */
684         for (i = 0; i < slaves_count; ++i) {
685                 agg = &bond_mode_8023ad_ports[slaves[i]];
686                 /* Skip ports that are not aggreagators */
687                 if (agg->aggregator_port_id != slaves[i])
688                         continue;
689
690                 agg_count[agg->aggregator_port_id] += 1;
691                 rte_eth_link_get_nowait(slaves[i], &link_info);
692                 agg_bandwidth[agg->aggregator_port_id] += link_info.link_speed;
693
694                 /* Actors system ID is not checked since all slave device have the same
695                  * ID (MAC address). */
696                 if ((agg->actor.key == port->actor.key &&
697                         agg->partner.system_priority == port->partner.system_priority &&
698                         rte_is_same_ether_addr(&agg->partner.system,
699                                         &port->partner.system) == 1
700                         && (agg->partner.key == port->partner.key)) &&
701                         rte_is_zero_ether_addr(&port->partner.system) != 1 &&
702                         (agg->actor.key &
703                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
704
705                         if (j == 0)
706                                 default_slave = i;
707                         j++;
708                 }
709         }
710
711         switch (internals->mode4.agg_selection) {
712         case AGG_COUNT:
713                 mode_count_id = max_index(
714                                 (uint64_t *)agg_count, slaves_count);
715                 new_agg_id = mode_count_id;
716                 break;
717         case AGG_BANDWIDTH:
718                 mode_band_id = max_index(
719                                 (uint64_t *)agg_bandwidth, slaves_count);
720                 new_agg_id = mode_band_id;
721                 break;
722         case AGG_STABLE:
723                 if (default_slave == slaves_count)
724                         new_agg_id = slave_id;
725                 else
726                         new_agg_id = slaves[default_slave];
727                 break;
728         default:
729                 if (default_slave == slaves_count)
730                         new_agg_id = slave_id;
731                 else
732                         new_agg_id = slaves[default_slave];
733                 break;
734         }
735
736         if (new_agg_id != port->aggregator_port_id) {
737                 port->aggregator_port_id = new_agg_id;
738
739                 MODE4_DEBUG("-> SELECTED: ID=%3u\n"
740                         "\t%s aggregator ID=%3u\n",
741                         port->aggregator_port_id,
742                         port->aggregator_port_id == slave_id ?
743                                 "aggregator not found, using default" : "aggregator found",
744                         port->aggregator_port_id);
745         }
746
747         port->selected = SELECTED;
748 }
749
750 /* Function maps DPDK speed to bonding speed stored in key field */
751 static uint16_t
752 link_speed_key(uint16_t speed) {
753         uint16_t key_speed;
754
755         switch (speed) {
756         case ETH_SPEED_NUM_NONE:
757                 key_speed = 0x00;
758                 break;
759         case ETH_SPEED_NUM_10M:
760                 key_speed = BOND_LINK_SPEED_KEY_10M;
761                 break;
762         case ETH_SPEED_NUM_100M:
763                 key_speed = BOND_LINK_SPEED_KEY_100M;
764                 break;
765         case ETH_SPEED_NUM_1G:
766                 key_speed = BOND_LINK_SPEED_KEY_1000M;
767                 break;
768         case ETH_SPEED_NUM_10G:
769                 key_speed = BOND_LINK_SPEED_KEY_10G;
770                 break;
771         case ETH_SPEED_NUM_20G:
772                 key_speed = BOND_LINK_SPEED_KEY_20G;
773                 break;
774         case ETH_SPEED_NUM_40G:
775                 key_speed = BOND_LINK_SPEED_KEY_40G;
776                 break;
777         default:
778                 /* Unknown speed*/
779                 key_speed = 0xFFFF;
780         }
781
782         return key_speed;
783 }
784
785 static void
786 rx_machine_update(struct bond_dev_private *internals, uint16_t slave_id,
787                 struct rte_mbuf *lacp_pkt) {
788         struct lacpdu_header *lacp;
789         struct lacpdu_actor_partner_params *partner;
790
791         if (lacp_pkt != NULL) {
792                 lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
793                 RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
794
795                 partner = &lacp->lacpdu.partner;
796                 if (rte_is_same_ether_addr(&partner->port_params.system,
797                         &internals->mode4.mac_addr)) {
798                         /* This LACP frame is sending to the bonding port
799                          * so pass it to rx_machine.
800                          */
801                         rx_machine(internals, slave_id, &lacp->lacpdu);
802                 }
803                 rte_pktmbuf_free(lacp_pkt);
804         } else
805                 rx_machine(internals, slave_id, NULL);
806 }
807
808 static void
809 bond_mode_8023ad_periodic_cb(void *arg)
810 {
811         struct rte_eth_dev *bond_dev = arg;
812         struct bond_dev_private *internals = bond_dev->data->dev_private;
813         struct port *port;
814         struct rte_eth_link link_info;
815         struct rte_ether_addr slave_addr;
816         struct rte_mbuf *lacp_pkt = NULL;
817         uint16_t slave_id;
818         uint16_t i;
819
820
821         /* Update link status on each port */
822         for (i = 0; i < internals->active_slave_count; i++) {
823                 uint16_t key;
824
825                 slave_id = internals->active_slaves[i];
826                 rte_eth_link_get_nowait(slave_id, &link_info);
827                 rte_eth_macaddr_get(slave_id, &slave_addr);
828
829                 if (link_info.link_status != 0) {
830                         key = link_speed_key(link_info.link_speed) << 1;
831                         if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
832                                 key |= BOND_LINK_FULL_DUPLEX_KEY;
833                 } else
834                         key = 0;
835
836                 port = &bond_mode_8023ad_ports[slave_id];
837
838                 key = rte_cpu_to_be_16(key);
839                 if (key != port->actor.key) {
840                         if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
841                                 set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
842
843                         port->actor.key = key;
844                         SM_FLAG_SET(port, NTT);
845                 }
846
847                 if (!rte_is_same_ether_addr(&port->actor.system, &slave_addr)) {
848                         rte_ether_addr_copy(&slave_addr, &port->actor.system);
849                         if (port->aggregator_port_id == slave_id)
850                                 SM_FLAG_SET(port, NTT);
851                 }
852         }
853
854         for (i = 0; i < internals->active_slave_count; i++) {
855                 slave_id = internals->active_slaves[i];
856                 port = &bond_mode_8023ad_ports[slave_id];
857
858                 if ((port->actor.key &
859                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
860
861                         SM_FLAG_SET(port, BEGIN);
862
863                         /* LACP is disabled on half duples or link is down */
864                         if (SM_FLAG(port, LACP_ENABLED)) {
865                                 /* If port was enabled set it to BEGIN state */
866                                 SM_FLAG_CLR(port, LACP_ENABLED);
867                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
868                                 ACTOR_STATE_CLR(port, COLLECTING);
869                         }
870
871                         /* Skip this port processing */
872                         continue;
873                 }
874
875                 SM_FLAG_SET(port, LACP_ENABLED);
876
877                 if (internals->mode4.dedicated_queues.enabled == 0) {
878                         /* Find LACP packet to this port. Do not check subtype,
879                          * it is done in function that queued packet
880                          */
881                         int retval = rte_ring_dequeue(port->rx_ring,
882                                         (void **)&lacp_pkt);
883
884                         if (retval != 0)
885                                 lacp_pkt = NULL;
886
887                         rx_machine_update(internals, slave_id, lacp_pkt);
888                 } else {
889                         uint16_t rx_count = rte_eth_rx_burst(slave_id,
890                                         internals->mode4.dedicated_queues.rx_qid,
891                                         &lacp_pkt, 1);
892
893                         if (rx_count == 1)
894                                 bond_mode_8023ad_handle_slow_pkt(internals,
895                                                 slave_id, lacp_pkt);
896                         else
897                                 rx_machine_update(internals, slave_id, NULL);
898                 }
899
900                 periodic_machine(internals, slave_id);
901                 mux_machine(internals, slave_id);
902                 tx_machine(internals, slave_id);
903                 selection_logic(internals, slave_id);
904
905                 SM_FLAG_CLR(port, BEGIN);
906                 show_warnings(slave_id);
907         }
908
909         rte_eal_alarm_set(internals->mode4.update_timeout_us,
910                         bond_mode_8023ad_periodic_cb, arg);
911 }
912
913 static int
914 bond_mode_8023ad_register_lacp_mac(uint16_t slave_id)
915 {
916         rte_eth_allmulticast_enable(slave_id);
917         if (rte_eth_allmulticast_get(slave_id)) {
918                 RTE_BOND_LOG(DEBUG, "forced allmulti for port %u",
919                              slave_id);
920                 bond_mode_8023ad_ports[slave_id].forced_rx_flags =
921                                 BOND_8023AD_FORCED_ALLMULTI;
922                 return 0;
923         }
924
925         rte_eth_promiscuous_enable(slave_id);
926         if (rte_eth_promiscuous_get(slave_id)) {
927                 RTE_BOND_LOG(DEBUG, "forced promiscuous for port %u",
928                              slave_id);
929                 bond_mode_8023ad_ports[slave_id].forced_rx_flags =
930                                 BOND_8023AD_FORCED_PROMISC;
931                 return 0;
932         }
933
934         return -1;
935 }
936
937 static void
938 bond_mode_8023ad_unregister_lacp_mac(uint16_t slave_id)
939 {
940         switch (bond_mode_8023ad_ports[slave_id].forced_rx_flags) {
941         case BOND_8023AD_FORCED_ALLMULTI:
942                 RTE_BOND_LOG(DEBUG, "unset allmulti for port %u", slave_id);
943                 rte_eth_allmulticast_disable(slave_id);
944                 break;
945
946         case BOND_8023AD_FORCED_PROMISC:
947                 RTE_BOND_LOG(DEBUG, "unset promisc for port %u", slave_id);
948                 rte_eth_promiscuous_disable(slave_id);
949                 break;
950
951         default:
952                 break;
953         }
954 }
955
956 void
957 bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev,
958                                 uint16_t slave_id)
959 {
960         struct bond_dev_private *internals = bond_dev->data->dev_private;
961
962         struct port *port = &bond_mode_8023ad_ports[slave_id];
963         struct port_params initial = {
964                         .system = { { 0 } },
965                         .system_priority = rte_cpu_to_be_16(0xFFFF),
966                         .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
967                         .port_priority = rte_cpu_to_be_16(0x00FF),
968                         .port_number = 0,
969         };
970
971         char mem_name[RTE_ETH_NAME_MAX_LEN];
972         int socket_id;
973         unsigned element_size;
974         uint32_t total_tx_desc;
975         struct bond_tx_queue *bd_tx_q;
976         uint16_t q_id;
977
978         /* Given slave mus not be in active list */
979         RTE_ASSERT(find_slave_by_id(internals->active_slaves,
980         internals->active_slave_count, slave_id) == internals->active_slave_count);
981         RTE_SET_USED(internals); /* used only for assert when enabled */
982
983         memcpy(&port->actor, &initial, sizeof(struct port_params));
984         /* Standard requires that port ID must be grater than 0.
985          * Add 1 do get corresponding port_number */
986         port->actor.port_number = rte_cpu_to_be_16(slave_id + 1);
987
988         memcpy(&port->partner, &initial, sizeof(struct port_params));
989
990         /* default states */
991         port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
992         port->partner_state = STATE_LACP_ACTIVE | STATE_AGGREGATION;
993         port->sm_flags = SM_FLAGS_BEGIN;
994
995         /* use this port as agregator */
996         port->aggregator_port_id = slave_id;
997
998         if (bond_mode_8023ad_register_lacp_mac(slave_id) < 0) {
999                 RTE_BOND_LOG(WARNING, "slave %u is most likely broken and won't receive LACP packets",
1000                              slave_id);
1001         }
1002
1003         timer_cancel(&port->warning_timer);
1004
1005         if (port->mbuf_pool != NULL)
1006                 return;
1007
1008         RTE_ASSERT(port->rx_ring == NULL);
1009         RTE_ASSERT(port->tx_ring == NULL);
1010
1011         socket_id = rte_eth_dev_socket_id(slave_id);
1012         if (socket_id == (int)LCORE_ID_ANY)
1013                 socket_id = rte_socket_id();
1014
1015         element_size = sizeof(struct slow_protocol_frame) +
1016                                 RTE_PKTMBUF_HEADROOM;
1017
1018         /* The size of the mempool should be at least:
1019          * the sum of the TX descriptors + BOND_MODE_8023AX_SLAVE_TX_PKTS */
1020         total_tx_desc = BOND_MODE_8023AX_SLAVE_TX_PKTS;
1021         for (q_id = 0; q_id < bond_dev->data->nb_tx_queues; q_id++) {
1022                 bd_tx_q = (struct bond_tx_queue*)bond_dev->data->tx_queues[q_id];
1023                 total_tx_desc += bd_tx_q->nb_tx_desc;
1024         }
1025
1026         snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
1027         port->mbuf_pool = rte_pktmbuf_pool_create(mem_name, total_tx_desc,
1028                 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
1029                         32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
1030                 0, element_size, socket_id);
1031
1032         /* Any memory allocation failure in initialization is critical because
1033          * resources can't be free, so reinitialization is impossible. */
1034         if (port->mbuf_pool == NULL) {
1035                 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1036                         slave_id, mem_name, rte_strerror(rte_errno));
1037         }
1038
1039         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
1040         port->rx_ring = rte_ring_create(mem_name,
1041                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
1042
1043         if (port->rx_ring == NULL) {
1044                 rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
1045                         mem_name, rte_strerror(rte_errno));
1046         }
1047
1048         /* TX ring is at least one pkt longer to make room for marker packet. */
1049         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
1050         port->tx_ring = rte_ring_create(mem_name,
1051                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
1052
1053         if (port->tx_ring == NULL) {
1054                 rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
1055                         mem_name, rte_strerror(rte_errno));
1056         }
1057 }
1058
1059 int
1060 bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev __rte_unused,
1061                 uint16_t slave_id)
1062 {
1063         void *pkt = NULL;
1064         struct port *port = NULL;
1065         uint8_t old_partner_state;
1066
1067         port = &bond_mode_8023ad_ports[slave_id];
1068
1069         ACTOR_STATE_CLR(port, AGGREGATION);
1070         port->selected = UNSELECTED;
1071
1072         old_partner_state = port->partner_state;
1073         record_default(port);
1074
1075         bond_mode_8023ad_unregister_lacp_mac(slave_id);
1076
1077         /* If partner timeout state changes then disable timer */
1078         if (!((old_partner_state ^ port->partner_state) &
1079                         STATE_LACP_SHORT_TIMEOUT))
1080                 timer_cancel(&port->current_while_timer);
1081
1082         PARTNER_STATE_CLR(port, AGGREGATION);
1083         ACTOR_STATE_CLR(port, EXPIRED);
1084
1085         /* flush rx/tx rings */
1086         while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
1087                 rte_pktmbuf_free((struct rte_mbuf *)pkt);
1088
1089         while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
1090                         rte_pktmbuf_free((struct rte_mbuf *)pkt);
1091         return 0;
1092 }
1093
1094 void
1095 bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
1096 {
1097         struct bond_dev_private *internals = bond_dev->data->dev_private;
1098         struct rte_ether_addr slave_addr;
1099         struct port *slave, *agg_slave;
1100         uint16_t slave_id, i, j;
1101
1102         bond_mode_8023ad_stop(bond_dev);
1103
1104         for (i = 0; i < internals->active_slave_count; i++) {
1105                 slave_id = internals->active_slaves[i];
1106                 slave = &bond_mode_8023ad_ports[slave_id];
1107                 rte_eth_macaddr_get(slave_id, &slave_addr);
1108
1109                 if (rte_is_same_ether_addr(&slave_addr, &slave->actor.system))
1110                         continue;
1111
1112                 rte_ether_addr_copy(&slave_addr, &slave->actor.system);
1113                 /* Do nothing if this port is not an aggregator. In other case
1114                  * Set NTT flag on every port that use this aggregator. */
1115                 if (slave->aggregator_port_id != slave_id)
1116                         continue;
1117
1118                 for (j = 0; j < internals->active_slave_count; j++) {
1119                         agg_slave = &bond_mode_8023ad_ports[internals->active_slaves[j]];
1120                         if (agg_slave->aggregator_port_id == slave_id)
1121                                 SM_FLAG_SET(agg_slave, NTT);
1122                 }
1123         }
1124
1125         if (bond_dev->data->dev_started)
1126                 bond_mode_8023ad_start(bond_dev);
1127 }
1128
1129 static void
1130 bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
1131                 struct rte_eth_bond_8023ad_conf *conf)
1132 {
1133         struct bond_dev_private *internals = dev->data->dev_private;
1134         struct mode8023ad_private *mode4 = &internals->mode4;
1135         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1136
1137         conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
1138         conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
1139         conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
1140         conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
1141         conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
1142         conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
1143         conf->update_timeout_ms = mode4->update_timeout_us / 1000;
1144         conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
1145         conf->slowrx_cb = mode4->slowrx_cb;
1146         conf->agg_selection = mode4->agg_selection;
1147 }
1148
1149 static void
1150 bond_mode_8023ad_conf_get_default(struct rte_eth_bond_8023ad_conf *conf)
1151 {
1152         conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
1153         conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
1154         conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
1155         conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
1156         conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
1157         conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
1158         conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
1159         conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
1160         conf->slowrx_cb = NULL;
1161         conf->agg_selection = AGG_STABLE;
1162 }
1163
1164 static void
1165 bond_mode_8023ad_conf_assign(struct mode8023ad_private *mode4,
1166                 struct rte_eth_bond_8023ad_conf *conf)
1167 {
1168         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1169
1170         mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
1171         mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
1172         mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
1173         mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
1174         mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
1175         mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
1176         mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
1177         mode4->update_timeout_us = conf->update_timeout_ms * 1000;
1178
1179         mode4->dedicated_queues.enabled = 0;
1180         mode4->dedicated_queues.rx_qid = UINT16_MAX;
1181         mode4->dedicated_queues.tx_qid = UINT16_MAX;
1182 }
1183
1184 void
1185 bond_mode_8023ad_setup(struct rte_eth_dev *dev,
1186                 struct rte_eth_bond_8023ad_conf *conf)
1187 {
1188         struct rte_eth_bond_8023ad_conf def_conf;
1189         struct bond_dev_private *internals = dev->data->dev_private;
1190         struct mode8023ad_private *mode4 = &internals->mode4;
1191
1192         if (conf == NULL) {
1193                 conf = &def_conf;
1194                 bond_mode_8023ad_conf_get_default(conf);
1195         }
1196
1197         bond_mode_8023ad_stop(dev);
1198         bond_mode_8023ad_conf_assign(mode4, conf);
1199         mode4->slowrx_cb = conf->slowrx_cb;
1200         mode4->agg_selection = AGG_STABLE;
1201
1202         if (dev->data->dev_started)
1203                 bond_mode_8023ad_start(dev);
1204 }
1205
1206 int
1207 bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
1208 {
1209         struct bond_dev_private *internals = bond_dev->data->dev_private;
1210         uint16_t i;
1211
1212         for (i = 0; i < internals->active_slave_count; i++)
1213                 bond_mode_8023ad_activate_slave(bond_dev,
1214                                 internals->active_slaves[i]);
1215
1216         return 0;
1217 }
1218
1219 int
1220 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
1221 {
1222         struct bond_dev_private *internals = bond_dev->data->dev_private;
1223         struct mode8023ad_private *mode4 = &internals->mode4;
1224         static const uint64_t us = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000;
1225
1226         rte_eth_macaddr_get(internals->port_id, &mode4->mac_addr);
1227         if (mode4->slowrx_cb)
1228                 return rte_eal_alarm_set(us, &bond_mode_8023ad_ext_periodic_cb,
1229                                          bond_dev);
1230
1231         return rte_eal_alarm_set(us, &bond_mode_8023ad_periodic_cb, bond_dev);
1232 }
1233
1234 void
1235 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
1236 {
1237         struct bond_dev_private *internals = bond_dev->data->dev_private;
1238         struct mode8023ad_private *mode4 = &internals->mode4;
1239
1240         if (mode4->slowrx_cb) {
1241                 rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb,
1242                                      bond_dev);
1243                 return;
1244         }
1245         rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
1246 }
1247
1248 void
1249 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
1250                                   uint16_t slave_id, struct rte_mbuf *pkt)
1251 {
1252         struct mode8023ad_private *mode4 = &internals->mode4;
1253         struct port *port = &bond_mode_8023ad_ports[slave_id];
1254         struct marker_header *m_hdr;
1255         uint64_t marker_timer, old_marker_timer;
1256         int retval;
1257         uint8_t wrn, subtype;
1258         /* If packet is a marker, we send response now by reusing given packet
1259          * and update only source MAC, destination MAC is multicast so don't
1260          * update it. Other frames will be handled later by state machines */
1261         subtype = rte_pktmbuf_mtod(pkt,
1262                         struct slow_protocol_frame *)->slow_protocol.subtype;
1263
1264         if (subtype == SLOW_SUBTYPE_MARKER) {
1265                 m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
1266
1267                 if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
1268                         wrn = WRN_UNKNOWN_MARKER_TYPE;
1269                         goto free_out;
1270                 }
1271
1272                 /* Setup marker timer. Do it in loop in case concurrent access. */
1273                 do {
1274                         old_marker_timer = port->rx_marker_timer;
1275                         if (!timer_is_expired(&old_marker_timer)) {
1276                                 wrn = WRN_RX_MARKER_TO_FAST;
1277                                 goto free_out;
1278                         }
1279
1280                         timer_set(&marker_timer, mode4->rx_marker_timeout);
1281                         retval = rte_atomic64_cmpset(&port->rx_marker_timer,
1282                                 old_marker_timer, marker_timer);
1283                 } while (unlikely(retval == 0));
1284
1285                 m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
1286                 rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
1287
1288                 if (internals->mode4.dedicated_queues.enabled == 0) {
1289                         int retval = rte_ring_enqueue(port->tx_ring, pkt);
1290                         if (retval != 0) {
1291                                 /* reset timer */
1292                                 port->rx_marker_timer = 0;
1293                                 wrn = WRN_TX_QUEUE_FULL;
1294                                 goto free_out;
1295                         }
1296                 } else {
1297                         /* Send packet directly to the slow queue */
1298                         uint16_t tx_count = rte_eth_tx_burst(slave_id,
1299                                         internals->mode4.dedicated_queues.tx_qid,
1300                                         &pkt, 1);
1301                         if (tx_count != 1) {
1302                                 /* reset timer */
1303                                 port->rx_marker_timer = 0;
1304                                 wrn = WRN_TX_QUEUE_FULL;
1305                                 goto free_out;
1306                         }
1307                 }
1308         } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
1309                 if (internals->mode4.dedicated_queues.enabled == 0) {
1310                         int retval = rte_ring_enqueue(port->rx_ring, pkt);
1311                         if (retval != 0) {
1312                                 /* If RX fing full free lacpdu message and drop packet */
1313                                 wrn = WRN_RX_QUEUE_FULL;
1314                                 goto free_out;
1315                         }
1316                 } else
1317                         rx_machine_update(internals, slave_id, pkt);
1318         } else {
1319                 wrn = WRN_UNKNOWN_SLOW_TYPE;
1320                 goto free_out;
1321         }
1322
1323         return;
1324
1325 free_out:
1326         set_warning_flags(port, wrn);
1327         rte_pktmbuf_free(pkt);
1328 }
1329
1330 int
1331 rte_eth_bond_8023ad_conf_get(uint16_t port_id,
1332                 struct rte_eth_bond_8023ad_conf *conf)
1333 {
1334         struct rte_eth_dev *bond_dev;
1335
1336         if (valid_bonded_port_id(port_id) != 0)
1337                 return -EINVAL;
1338
1339         if (conf == NULL)
1340                 return -EINVAL;
1341
1342         bond_dev = &rte_eth_devices[port_id];
1343         bond_mode_8023ad_conf_get(bond_dev, conf);
1344         return 0;
1345 }
1346
1347 int
1348 rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id,
1349                 enum rte_bond_8023ad_agg_selection agg_selection)
1350 {
1351         struct rte_eth_dev *bond_dev;
1352         struct bond_dev_private *internals;
1353         struct mode8023ad_private *mode4;
1354
1355         bond_dev = &rte_eth_devices[port_id];
1356         internals = bond_dev->data->dev_private;
1357
1358         if (valid_bonded_port_id(port_id) != 0)
1359                 return -EINVAL;
1360         if (internals->mode != 4)
1361                 return -EINVAL;
1362
1363         mode4 = &internals->mode4;
1364         if (agg_selection == AGG_COUNT || agg_selection == AGG_BANDWIDTH
1365                         || agg_selection == AGG_STABLE)
1366                 mode4->agg_selection = agg_selection;
1367         return 0;
1368 }
1369
1370 int rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id)
1371 {
1372         struct rte_eth_dev *bond_dev;
1373         struct bond_dev_private *internals;
1374         struct mode8023ad_private *mode4;
1375
1376         bond_dev = &rte_eth_devices[port_id];
1377         internals = bond_dev->data->dev_private;
1378
1379         if (valid_bonded_port_id(port_id) != 0)
1380                 return -EINVAL;
1381         if (internals->mode != 4)
1382                 return -EINVAL;
1383         mode4 = &internals->mode4;
1384
1385         return mode4->agg_selection;
1386 }
1387
1388
1389
1390 static int
1391 bond_8023ad_setup_validate(uint16_t port_id,
1392                 struct rte_eth_bond_8023ad_conf *conf)
1393 {
1394         if (valid_bonded_port_id(port_id) != 0)
1395                 return -EINVAL;
1396
1397         if (conf != NULL) {
1398                 /* Basic sanity check */
1399                 if (conf->slow_periodic_ms == 0 ||
1400                                 conf->fast_periodic_ms >= conf->slow_periodic_ms ||
1401                                 conf->long_timeout_ms == 0 ||
1402                                 conf->short_timeout_ms >= conf->long_timeout_ms ||
1403                                 conf->aggregate_wait_timeout_ms == 0 ||
1404                                 conf->tx_period_ms == 0 ||
1405                                 conf->rx_marker_period_ms == 0 ||
1406                                 conf->update_timeout_ms == 0) {
1407                         RTE_BOND_LOG(ERR, "given mode 4 configuration is invalid");
1408                         return -EINVAL;
1409                 }
1410         }
1411
1412         return 0;
1413 }
1414
1415
1416 int
1417 rte_eth_bond_8023ad_setup(uint16_t port_id,
1418                 struct rte_eth_bond_8023ad_conf *conf)
1419 {
1420         struct rte_eth_dev *bond_dev;
1421         int err;
1422
1423         err = bond_8023ad_setup_validate(port_id, conf);
1424         if (err != 0)
1425                 return err;
1426
1427         bond_dev = &rte_eth_devices[port_id];
1428         bond_mode_8023ad_setup(bond_dev, conf);
1429
1430         return 0;
1431 }
1432
1433
1434
1435
1436
1437 int
1438 rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id,
1439                 struct rte_eth_bond_8023ad_slave_info *info)
1440 {
1441         struct rte_eth_dev *bond_dev;
1442         struct bond_dev_private *internals;
1443         struct port *port;
1444
1445         if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
1446                         rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1447                 return -EINVAL;
1448
1449         bond_dev = &rte_eth_devices[port_id];
1450
1451         internals = bond_dev->data->dev_private;
1452         if (find_slave_by_id(internals->active_slaves,
1453                         internals->active_slave_count, slave_id) ==
1454                                 internals->active_slave_count)
1455                 return -EINVAL;
1456
1457         port = &bond_mode_8023ad_ports[slave_id];
1458         info->selected = port->selected;
1459
1460         info->actor_state = port->actor_state;
1461         rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
1462
1463         info->partner_state = port->partner_state;
1464         rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
1465
1466         info->agg_port_id = port->aggregator_port_id;
1467         return 0;
1468 }
1469
1470 static int
1471 bond_8023ad_ext_validate(uint16_t port_id, uint16_t slave_id)
1472 {
1473         struct rte_eth_dev *bond_dev;
1474         struct bond_dev_private *internals;
1475         struct mode8023ad_private *mode4;
1476
1477         if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1478                 return -EINVAL;
1479
1480         bond_dev = &rte_eth_devices[port_id];
1481
1482         if (!bond_dev->data->dev_started)
1483                 return -EINVAL;
1484
1485         internals = bond_dev->data->dev_private;
1486         if (find_slave_by_id(internals->active_slaves,
1487                         internals->active_slave_count, slave_id) ==
1488                                 internals->active_slave_count)
1489                 return -EINVAL;
1490
1491         mode4 = &internals->mode4;
1492         if (mode4->slowrx_cb == NULL)
1493                 return -EINVAL;
1494
1495         return 0;
1496 }
1497
1498 int
1499 rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id,
1500                                 int enabled)
1501 {
1502         struct port *port;
1503         int res;
1504
1505         res = bond_8023ad_ext_validate(port_id, slave_id);
1506         if (res != 0)
1507                 return res;
1508
1509         port = &bond_mode_8023ad_ports[slave_id];
1510
1511         if (enabled)
1512                 ACTOR_STATE_SET(port, COLLECTING);
1513         else
1514                 ACTOR_STATE_CLR(port, COLLECTING);
1515
1516         return 0;
1517 }
1518
1519 int
1520 rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id,
1521                                 int enabled)
1522 {
1523         struct port *port;
1524         int res;
1525
1526         res = bond_8023ad_ext_validate(port_id, slave_id);
1527         if (res != 0)
1528                 return res;
1529
1530         port = &bond_mode_8023ad_ports[slave_id];
1531
1532         if (enabled)
1533                 ACTOR_STATE_SET(port, DISTRIBUTING);
1534         else
1535                 ACTOR_STATE_CLR(port, DISTRIBUTING);
1536
1537         return 0;
1538 }
1539
1540 int
1541 rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id)
1542 {
1543         struct port *port;
1544         int err;
1545
1546         err = bond_8023ad_ext_validate(port_id, slave_id);
1547         if (err != 0)
1548                 return err;
1549
1550         port = &bond_mode_8023ad_ports[slave_id];
1551         return ACTOR_STATE(port, DISTRIBUTING);
1552 }
1553
1554 int
1555 rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id)
1556 {
1557         struct port *port;
1558         int err;
1559
1560         err = bond_8023ad_ext_validate(port_id, slave_id);
1561         if (err != 0)
1562                 return err;
1563
1564         port = &bond_mode_8023ad_ports[slave_id];
1565         return ACTOR_STATE(port, COLLECTING);
1566 }
1567
1568 int
1569 rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id,
1570                 struct rte_mbuf *lacp_pkt)
1571 {
1572         struct port *port;
1573         int res;
1574
1575         res = bond_8023ad_ext_validate(port_id, slave_id);
1576         if (res != 0)
1577                 return res;
1578
1579         port = &bond_mode_8023ad_ports[slave_id];
1580
1581         if (rte_pktmbuf_pkt_len(lacp_pkt) < sizeof(struct lacpdu_header))
1582                 return -EINVAL;
1583
1584         struct lacpdu_header *lacp;
1585
1586         /* only enqueue LACPDUs */
1587         lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
1588         if (lacp->lacpdu.subtype != SLOW_SUBTYPE_LACP)
1589                 return -EINVAL;
1590
1591         MODE4_DEBUG("sending LACP frame\n");
1592
1593         return rte_ring_enqueue(port->tx_ring, lacp_pkt);
1594 }
1595
1596 static void
1597 bond_mode_8023ad_ext_periodic_cb(void *arg)
1598 {
1599         struct rte_eth_dev *bond_dev = arg;
1600         struct bond_dev_private *internals = bond_dev->data->dev_private;
1601         struct mode8023ad_private *mode4 = &internals->mode4;
1602         struct port *port;
1603         void *pkt = NULL;
1604         uint16_t i, slave_id;
1605
1606         for (i = 0; i < internals->active_slave_count; i++) {
1607                 slave_id = internals->active_slaves[i];
1608                 port = &bond_mode_8023ad_ports[slave_id];
1609
1610                 if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
1611                         struct rte_mbuf *lacp_pkt = pkt;
1612                         struct lacpdu_header *lacp;
1613
1614                         lacp = rte_pktmbuf_mtod(lacp_pkt,
1615                                                 struct lacpdu_header *);
1616                         RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
1617
1618                         /* This is LACP frame so pass it to rx callback.
1619                          * Callback is responsible for freeing mbuf.
1620                          */
1621                         mode4->slowrx_cb(slave_id, lacp_pkt);
1622                 }
1623         }
1624
1625         rte_eal_alarm_set(internals->mode4.update_timeout_us,
1626                         bond_mode_8023ad_ext_periodic_cb, arg);
1627 }
1628
1629 int
1630 rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port)
1631 {
1632         int retval = 0;
1633         struct rte_eth_dev *dev = &rte_eth_devices[port];
1634         struct bond_dev_private *internals = (struct bond_dev_private *)
1635                 dev->data->dev_private;
1636
1637         if (check_for_bonded_ethdev(dev) != 0)
1638                 return -1;
1639
1640         if (bond_8023ad_slow_pkt_hw_filter_supported(port) != 0)
1641                 return -1;
1642
1643         /* Device must be stopped to set up slow queue */
1644         if (dev->data->dev_started)
1645                 return -1;
1646
1647         internals->mode4.dedicated_queues.enabled = 1;
1648
1649         bond_ethdev_mode_set(dev, internals->mode);
1650         return retval;
1651 }
1652
1653 int
1654 rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port)
1655 {
1656         int retval = 0;
1657         struct rte_eth_dev *dev = &rte_eth_devices[port];
1658         struct bond_dev_private *internals = (struct bond_dev_private *)
1659                 dev->data->dev_private;
1660
1661         if (check_for_bonded_ethdev(dev) != 0)
1662                 return -1;
1663
1664         /* Device must be stopped to set up slow queue */
1665         if (dev->data->dev_started)
1666                 return -1;
1667
1668         internals->mode4.dedicated_queues.enabled = 0;
1669
1670         bond_ethdev_mode_set(dev, internals->mode);
1671
1672         return retval;
1673 }