net/bonding: check code of promiscuous mode switch
[dpdk.git] / drivers / net / bonding / rte_eth_bond_8023ad.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <stddef.h>
6 #include <string.h>
7 #include <stdbool.h>
8
9 #include <rte_alarm.h>
10 #include <rte_malloc.h>
11 #include <rte_errno.h>
12 #include <rte_cycles.h>
13 #include <rte_compat.h>
14
15 #include "rte_eth_bond_private.h"
16
17 static void bond_mode_8023ad_ext_periodic_cb(void *arg);
18 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
19
20 #define MODE4_DEBUG(fmt, ...)                           \
21         rte_log(RTE_LOG_DEBUG, bond_logtype,            \
22                 "%6u [Port %u: %s] " fmt,               \
23                 bond_dbg_get_time_diff_ms(), slave_id,  \
24                 __func__, ##__VA_ARGS__)
25
26 static uint64_t start_time;
27
28 static unsigned
29 bond_dbg_get_time_diff_ms(void)
30 {
31         uint64_t now;
32
33         now = rte_rdtsc();
34         if (start_time == 0)
35                 start_time = now;
36
37         return ((now - start_time) * 1000) / rte_get_tsc_hz();
38 }
39
40 static void
41 bond_print_lacp(struct lacpdu *l)
42 {
43         char a_address[18];
44         char p_address[18];
45         char a_state[256] = { 0 };
46         char p_state[256] = { 0 };
47
48         static const char * const state_labels[] = {
49                 "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
50         };
51
52         int a_len = 0;
53         int p_len = 0;
54         uint8_t i;
55         uint8_t *addr;
56
57         addr = l->actor.port_params.system.addr_bytes;
58         snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
59                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
60
61         addr = l->partner.port_params.system.addr_bytes;
62         snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
63                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
64
65         for (i = 0; i < 8; i++) {
66                 if ((l->actor.state >> i) & 1) {
67                         a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
68                                 state_labels[i]);
69                 }
70
71                 if ((l->partner.state >> i) & 1) {
72                         p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
73                                 state_labels[i]);
74                 }
75         }
76
77         if (a_len && a_state[a_len-1] == ' ')
78                 a_state[a_len-1] = '\0';
79
80         if (p_len && p_state[p_len-1] == ' ')
81                 p_state[p_len-1] = '\0';
82
83         RTE_BOND_LOG(DEBUG,
84                      "LACP: {\n"
85                      "  subtype= %02X\n"
86                      "  ver_num=%02X\n"
87                      "  actor={ tlv=%02X, len=%02X\n"
88                      "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"
89                      "       state={ %s }\n"
90                      "  }\n"
91                      "  partner={ tlv=%02X, len=%02X\n"
92                      "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"
93                      "       state={ %s }\n"
94                      "  }\n"
95                      "  collector={info=%02X, length=%02X, max_delay=%04X\n, "
96                      "type_term=%02X, terminator_length = %02X }",
97                      l->subtype,
98                      l->version_number,
99                      l->actor.tlv_type_info,
100                      l->actor.info_length,
101                      l->actor.port_params.system_priority,
102                      a_address,
103                      l->actor.port_params.key,
104                      l->actor.port_params.port_priority,
105                      l->actor.port_params.port_number,
106                      a_state,
107                      l->partner.tlv_type_info,
108                      l->partner.info_length,
109                      l->partner.port_params.system_priority,
110                      p_address,
111                      l->partner.port_params.key,
112                      l->partner.port_params.port_priority,
113                      l->partner.port_params.port_number,
114                      p_state,
115                      l->tlv_type_collector_info,
116                      l->collector_info_length,
117                      l->collector_max_delay,
118                      l->tlv_type_terminator,
119                      l->terminator_length);
120
121 }
122
123 #define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
124 #else
125 #define BOND_PRINT_LACP(lacpdu) do { } while (0)
126 #define MODE4_DEBUG(fmt, ...) do { } while (0)
127 #endif
128
129 static const struct rte_ether_addr lacp_mac_addr = {
130         .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
131 };
132
133 struct port bond_mode_8023ad_ports[RTE_MAX_ETHPORTS];
134
135 static void
136 timer_cancel(uint64_t *timer)
137 {
138         *timer = 0;
139 }
140
141 static void
142 timer_set(uint64_t *timer, uint64_t timeout)
143 {
144         *timer = rte_rdtsc() + timeout;
145 }
146
147 /* Forces given timer to be in expired state. */
148 static void
149 timer_force_expired(uint64_t *timer)
150 {
151         *timer = rte_rdtsc();
152 }
153
154 static bool
155 timer_is_stopped(uint64_t *timer)
156 {
157         return *timer == 0;
158 }
159
160 static bool
161 timer_is_expired(uint64_t *timer)
162 {
163         return *timer < rte_rdtsc();
164 }
165
166 /* Timer is in running state if it is not stopped nor expired */
167 static bool
168 timer_is_running(uint64_t *timer)
169 {
170         return !timer_is_stopped(timer) && !timer_is_expired(timer);
171 }
172
173 static void
174 set_warning_flags(struct port *port, uint16_t flags)
175 {
176         int retval;
177         uint16_t old;
178         uint16_t new_flag = 0;
179
180         do {
181                 old = port->warnings_to_show;
182                 new_flag = old | flags;
183                 retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
184         } while (unlikely(retval == 0));
185 }
186
187 static void
188 show_warnings(uint16_t slave_id)
189 {
190         struct port *port = &bond_mode_8023ad_ports[slave_id];
191         uint8_t warnings;
192
193         do {
194                 warnings = port->warnings_to_show;
195         } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
196
197         if (!warnings)
198                 return;
199
200         if (!timer_is_expired(&port->warning_timer))
201                 return;
202
203
204         timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
205                         rte_get_tsc_hz() / 1000);
206
207         if (warnings & WRN_RX_QUEUE_FULL) {
208                 RTE_BOND_LOG(DEBUG,
209                              "Slave %u: failed to enqueue LACP packet into RX ring.\n"
210                              "Receive and transmit functions must be invoked on bonded"
211                              "interface at least 10 times per second or LACP will notwork correctly",
212                              slave_id);
213         }
214
215         if (warnings & WRN_TX_QUEUE_FULL) {
216                 RTE_BOND_LOG(DEBUG,
217                              "Slave %u: failed to enqueue LACP packet into TX ring.\n"
218                              "Receive and transmit functions must be invoked on bonded"
219                              "interface at least 10 times per second or LACP will not work correctly",
220                              slave_id);
221         }
222
223         if (warnings & WRN_RX_MARKER_TO_FAST)
224                 RTE_BOND_LOG(INFO, "Slave %u: marker to early - ignoring.",
225                              slave_id);
226
227         if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
228                 RTE_BOND_LOG(INFO,
229                         "Slave %u: ignoring unknown slow protocol frame type",
230                              slave_id);
231         }
232
233         if (warnings & WRN_UNKNOWN_MARKER_TYPE)
234                 RTE_BOND_LOG(INFO, "Slave %u: ignoring unknown marker type",
235                              slave_id);
236
237         if (warnings & WRN_NOT_LACP_CAPABLE)
238                 MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
239 }
240
241 static void
242 record_default(struct port *port)
243 {
244         /* Record default parameters for partner. Partner admin parameters
245          * are not implemented so set them to arbitrary default (last known) and
246          * mark actor that parner is in defaulted state. */
247         port->partner_state = STATE_LACP_ACTIVE;
248         ACTOR_STATE_SET(port, DEFAULTED);
249 }
250
251 /** Function handles rx state machine.
252  *
253  * This function implements Receive State Machine from point 5.4.12 in
254  * 802.1AX documentation. It should be called periodically.
255  *
256  * @param lacpdu                LACPDU received.
257  * @param port                  Port on which LACPDU was received.
258  */
259 static void
260 rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
261                 struct lacpdu *lacp)
262 {
263         struct port *agg, *port = &bond_mode_8023ad_ports[slave_id];
264         uint64_t timeout;
265
266         if (SM_FLAG(port, BEGIN)) {
267                 /* Initialize stuff */
268                 MODE4_DEBUG("-> INITIALIZE\n");
269                 SM_FLAG_CLR(port, MOVED);
270                 port->selected = UNSELECTED;
271
272                 record_default(port);
273
274                 ACTOR_STATE_CLR(port, EXPIRED);
275                 timer_cancel(&port->current_while_timer);
276
277                 /* DISABLED: On initialization partner is out of sync */
278                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
279
280                 /* LACP DISABLED stuff if LACP not enabled on this port */
281                 if (!SM_FLAG(port, LACP_ENABLED))
282                         PARTNER_STATE_CLR(port, AGGREGATION);
283                 else
284                         PARTNER_STATE_SET(port, AGGREGATION);
285         }
286
287         if (!SM_FLAG(port, LACP_ENABLED)) {
288                 /* Update parameters only if state changed */
289                 if (!timer_is_stopped(&port->current_while_timer)) {
290                         port->selected = UNSELECTED;
291                         record_default(port);
292                         PARTNER_STATE_CLR(port, AGGREGATION);
293                         ACTOR_STATE_CLR(port, EXPIRED);
294                         timer_cancel(&port->current_while_timer);
295                 }
296                 return;
297         }
298
299         if (lacp) {
300                 MODE4_DEBUG("LACP -> CURRENT\n");
301                 BOND_PRINT_LACP(lacp);
302                 /* Update selected flag. If partner parameters are defaulted assume they
303                  * are match. If not defaulted  compare LACP actor with ports parner
304                  * params. */
305                 if (!ACTOR_STATE(port, DEFAULTED) &&
306                         (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
307                         || memcmp(&port->partner, &lacp->actor.port_params,
308                                 sizeof(port->partner)) != 0)) {
309                         MODE4_DEBUG("selected <- UNSELECTED\n");
310                         port->selected = UNSELECTED;
311                 }
312
313                 /* Record this PDU actor params as partner params */
314                 memcpy(&port->partner, &lacp->actor.port_params,
315                         sizeof(struct port_params));
316                 port->partner_state = lacp->actor.state;
317
318                 /* Partner parameters are not defaulted any more */
319                 ACTOR_STATE_CLR(port, DEFAULTED);
320
321                 /* If LACP partner params match this port actor params */
322                 agg = &bond_mode_8023ad_ports[port->aggregator_port_id];
323                 bool match = port->actor.system_priority ==
324                         lacp->partner.port_params.system_priority &&
325                         rte_is_same_ether_addr(&agg->actor.system,
326                         &lacp->partner.port_params.system) &&
327                         port->actor.port_priority ==
328                         lacp->partner.port_params.port_priority &&
329                         port->actor.port_number ==
330                         lacp->partner.port_params.port_number;
331
332                 /* Update NTT if partners information are outdated (xored and masked
333                  * bits are set)*/
334                 uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
335                         STATE_SYNCHRONIZATION | STATE_AGGREGATION;
336
337                 if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
338                                 match == false) {
339                         SM_FLAG_SET(port, NTT);
340                 }
341
342                 /* If LACP partner params match this port actor params */
343                 if (match == true && ACTOR_STATE(port, AGGREGATION) ==
344                                 PARTNER_STATE(port,     AGGREGATION))
345                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
346                 else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
347                                 AGGREGATION))
348                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
349                 else
350                         PARTNER_STATE_CLR(port, SYNCHRONIZATION);
351
352                 if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
353                         timeout = internals->mode4.short_timeout;
354                 else
355                         timeout = internals->mode4.long_timeout;
356
357                 timer_set(&port->current_while_timer, timeout);
358                 ACTOR_STATE_CLR(port, EXPIRED);
359                 return; /* No state change */
360         }
361
362         /* If CURRENT state timer is not running (stopped or expired)
363          * transit to EXPIRED state from DISABLED or CURRENT */
364         if (!timer_is_running(&port->current_while_timer)) {
365                 ACTOR_STATE_SET(port, EXPIRED);
366                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
367                 PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
368                 timer_set(&port->current_while_timer, internals->mode4.short_timeout);
369         }
370 }
371
372 /**
373  * Function handles periodic tx state machine.
374  *
375  * Function implements Periodic Transmission state machine from point 5.4.13
376  * in 802.1AX documentation. It should be called periodically.
377  *
378  * @param port                  Port to handle state machine.
379  */
380 static void
381 periodic_machine(struct bond_dev_private *internals, uint16_t slave_id)
382 {
383         struct port *port = &bond_mode_8023ad_ports[slave_id];
384         /* Calculate if either site is LACP enabled */
385         uint64_t timeout;
386         uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
387                 PARTNER_STATE(port, LACP_ACTIVE);
388
389         uint8_t is_partner_fast, was_partner_fast;
390         /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
391         if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
392                 timer_cancel(&port->periodic_timer);
393                 timer_force_expired(&port->tx_machine_timer);
394                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
395
396                 MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
397                         SM_FLAG(port, BEGIN) ? "begind " : "",
398                         SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
399                         active ? "LACP active " : "LACP pasive ");
400                 return;
401         }
402
403         is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
404         was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
405
406         /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
407          * Other case: check if timer expire or partners settings changed. */
408         if (!timer_is_stopped(&port->periodic_timer)) {
409                 if (timer_is_expired(&port->periodic_timer)) {
410                         SM_FLAG_SET(port, NTT);
411                 } else if (is_partner_fast != was_partner_fast) {
412                         /* Partners timeout  was slow and now it is fast -> send LACP.
413                          * In other case (was fast and now it is slow) just switch
414                          * timeout to slow without forcing send of LACP (because standard
415                          * say so)*/
416                         if (is_partner_fast)
417                                 SM_FLAG_SET(port, NTT);
418                 } else
419                         return; /* Nothing changed */
420         }
421
422         /* Handle state transition to FAST/SLOW LACP timeout */
423         if (is_partner_fast) {
424                 timeout = internals->mode4.fast_periodic_timeout;
425                 SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
426         } else {
427                 timeout = internals->mode4.slow_periodic_timeout;
428                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
429         }
430
431         timer_set(&port->periodic_timer, timeout);
432 }
433
434 /**
435  * Function handles mux state machine.
436  *
437  * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
438  * It should be called periodically.
439  *
440  * @param port                  Port to handle state machine.
441  */
442 static void
443 mux_machine(struct bond_dev_private *internals, uint16_t slave_id)
444 {
445         struct port *port = &bond_mode_8023ad_ports[slave_id];
446
447         /* Save current state for later use */
448         const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
449                 STATE_COLLECTING;
450
451         /* Enter DETACHED state on BEGIN condition or from any other state if
452          * port was unselected */
453         if (SM_FLAG(port, BEGIN) ||
454                         port->selected == UNSELECTED || (port->selected == STANDBY &&
455                                 (port->actor_state & state_mask) != 0)) {
456                 /* detach mux from aggregator */
457                 port->actor_state &= ~state_mask;
458                 /* Set ntt to true if BEGIN condition or transition from any other state
459                  * which is indicated that wait_while_timer was started */
460                 if (SM_FLAG(port, BEGIN) ||
461                                 !timer_is_stopped(&port->wait_while_timer)) {
462                         SM_FLAG_SET(port, NTT);
463                         MODE4_DEBUG("-> DETACHED\n");
464                 }
465                 timer_cancel(&port->wait_while_timer);
466         }
467
468         if (timer_is_stopped(&port->wait_while_timer)) {
469                 if (port->selected == SELECTED || port->selected == STANDBY) {
470                         timer_set(&port->wait_while_timer,
471                                 internals->mode4.aggregate_wait_timeout);
472
473                         MODE4_DEBUG("DETACHED -> WAITING\n");
474                 }
475                 /* Waiting state entered */
476                 return;
477         }
478
479         /* Transit next state if port is ready */
480         if (!timer_is_expired(&port->wait_while_timer))
481                 return;
482
483         if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
484                 !PARTNER_STATE(port, SYNCHRONIZATION)) {
485                 /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
486                  * sync transit to ATACHED state.  */
487                 ACTOR_STATE_CLR(port, DISTRIBUTING);
488                 ACTOR_STATE_CLR(port, COLLECTING);
489                 /* Clear actor sync to activate transit ATACHED in condition bellow */
490                 ACTOR_STATE_CLR(port, SYNCHRONIZATION);
491                 MODE4_DEBUG("Out of sync -> ATTACHED\n");
492         }
493
494         if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
495                 /* attach mux to aggregator */
496                 RTE_ASSERT((port->actor_state & (STATE_COLLECTING |
497                         STATE_DISTRIBUTING)) == 0);
498
499                 ACTOR_STATE_SET(port, SYNCHRONIZATION);
500                 SM_FLAG_SET(port, NTT);
501                 MODE4_DEBUG("ATTACHED Entered\n");
502         } else if (!ACTOR_STATE(port, COLLECTING)) {
503                 /* Start collecting if in sync */
504                 if (PARTNER_STATE(port, SYNCHRONIZATION)) {
505                         MODE4_DEBUG("ATTACHED -> COLLECTING\n");
506                         ACTOR_STATE_SET(port, COLLECTING);
507                         SM_FLAG_SET(port, NTT);
508                 }
509         } else if (ACTOR_STATE(port, COLLECTING)) {
510                 /* Check if partner is in COLLECTING state. If so this port can
511                  * distribute frames to it */
512                 if (!ACTOR_STATE(port, DISTRIBUTING)) {
513                         if (PARTNER_STATE(port, COLLECTING)) {
514                                 /* Enable  DISTRIBUTING if partner is collecting */
515                                 ACTOR_STATE_SET(port, DISTRIBUTING);
516                                 SM_FLAG_SET(port, NTT);
517                                 MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
518                                 RTE_BOND_LOG(INFO,
519                                         "Bond %u: slave id %u distributing started.",
520                                         internals->port_id, slave_id);
521                         }
522                 } else {
523                         if (!PARTNER_STATE(port, COLLECTING)) {
524                                 /* Disable DISTRIBUTING (enter COLLECTING state) if partner
525                                  * is not collecting */
526                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
527                                 SM_FLAG_SET(port, NTT);
528                                 MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
529                                 RTE_BOND_LOG(INFO,
530                                         "Bond %u: slave id %u distributing stopped.",
531                                         internals->port_id, slave_id);
532                         }
533                 }
534         }
535 }
536
537 /**
538  * Function handles transmit state machine.
539  *
540  * Function implements Transmit Machine from point 5.4.16 in 802.1AX
541  * documentation.
542  *
543  * @param port
544  */
545 static void
546 tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
547 {
548         struct port *agg, *port = &bond_mode_8023ad_ports[slave_id];
549
550         struct rte_mbuf *lacp_pkt = NULL;
551         struct lacpdu_header *hdr;
552         struct lacpdu *lacpdu;
553
554         /* If periodic timer is not running periodic machine is in NO PERIODIC and
555          * according to 802.3ax standard tx machine should not transmit any frames
556          * and set ntt to false. */
557         if (timer_is_stopped(&port->periodic_timer))
558                 SM_FLAG_CLR(port, NTT);
559
560         if (!SM_FLAG(port, NTT))
561                 return;
562
563         if (!timer_is_expired(&port->tx_machine_timer))
564                 return;
565
566         lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
567         if (lacp_pkt == NULL) {
568                 RTE_BOND_LOG(ERR, "Failed to allocate LACP packet from pool");
569                 return;
570         }
571
572         lacp_pkt->data_len = sizeof(*hdr);
573         lacp_pkt->pkt_len = sizeof(*hdr);
574
575         hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
576
577         /* Source and destination MAC */
578         rte_ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
579         rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
580         hdr->eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_SLOW);
581
582         lacpdu = &hdr->lacpdu;
583         memset(lacpdu, 0, sizeof(*lacpdu));
584
585         /* Initialize LACP part */
586         lacpdu->subtype = SLOW_SUBTYPE_LACP;
587         lacpdu->version_number = 1;
588
589         /* ACTOR */
590         lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
591         lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
592         memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
593                         sizeof(port->actor));
594         agg = &bond_mode_8023ad_ports[port->aggregator_port_id];
595         rte_ether_addr_copy(&agg->actor.system,
596                         &hdr->lacpdu.actor.port_params.system);
597         lacpdu->actor.state = port->actor_state;
598
599         /* PARTNER */
600         lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
601         lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
602         memcpy(&lacpdu->partner.port_params, &port->partner,
603                         sizeof(struct port_params));
604         lacpdu->partner.state = port->partner_state;
605
606         /* Other fields */
607         lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
608         lacpdu->collector_info_length = 0x10;
609         lacpdu->collector_max_delay = 0;
610
611         lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
612         lacpdu->terminator_length = 0;
613
614         MODE4_DEBUG("Sending LACP frame\n");
615         BOND_PRINT_LACP(lacpdu);
616
617         if (internals->mode4.dedicated_queues.enabled == 0) {
618                 int retval = rte_ring_enqueue(port->tx_ring, lacp_pkt);
619                 if (retval != 0) {
620                         /* If TX ring full, drop packet and free message.
621                            Retransmission will happen in next function call. */
622                         rte_pktmbuf_free(lacp_pkt);
623                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
624                         return;
625                 }
626         } else {
627                 uint16_t pkts_sent = rte_eth_tx_burst(slave_id,
628                                 internals->mode4.dedicated_queues.tx_qid,
629                                 &lacp_pkt, 1);
630                 if (pkts_sent != 1) {
631                         rte_pktmbuf_free(lacp_pkt);
632                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
633                         return;
634                 }
635         }
636
637
638         timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
639         SM_FLAG_CLR(port, NTT);
640 }
641
642 static uint8_t
643 max_index(uint64_t *a, int n)
644 {
645         if (n <= 0)
646                 return -1;
647
648         int i, max_i = 0;
649         uint64_t max = a[0];
650
651         for (i = 1; i < n; ++i) {
652                 if (a[i] > max) {
653                         max = a[i];
654                         max_i = i;
655                 }
656         }
657
658         return max_i;
659 }
660
661 /**
662  * Function assigns port to aggregator.
663  *
664  * @param bond_dev_private      Pointer to bond_dev_private structure.
665  * @param port_pos                      Port to assign.
666  */
667 static void
668 selection_logic(struct bond_dev_private *internals, uint16_t slave_id)
669 {
670         struct port *agg, *port;
671         uint16_t slaves_count, new_agg_id, i, j = 0;
672         uint16_t *slaves;
673         uint64_t agg_bandwidth[8] = {0};
674         uint64_t agg_count[8] = {0};
675         uint16_t default_slave = 0;
676         uint8_t mode_count_id, mode_band_id;
677         struct rte_eth_link link_info;
678
679         slaves = internals->active_slaves;
680         slaves_count = internals->active_slave_count;
681         port = &bond_mode_8023ad_ports[slave_id];
682
683         /* Search for aggregator suitable for this port */
684         for (i = 0; i < slaves_count; ++i) {
685                 agg = &bond_mode_8023ad_ports[slaves[i]];
686                 /* Skip ports that are not aggreagators */
687                 if (agg->aggregator_port_id != slaves[i])
688                         continue;
689
690                 agg_count[agg->aggregator_port_id] += 1;
691                 rte_eth_link_get_nowait(slaves[i], &link_info);
692                 agg_bandwidth[agg->aggregator_port_id] += link_info.link_speed;
693
694                 /* Actors system ID is not checked since all slave device have the same
695                  * ID (MAC address). */
696                 if ((agg->actor.key == port->actor.key &&
697                         agg->partner.system_priority == port->partner.system_priority &&
698                         rte_is_same_ether_addr(&agg->partner.system,
699                                         &port->partner.system) == 1
700                         && (agg->partner.key == port->partner.key)) &&
701                         rte_is_zero_ether_addr(&port->partner.system) != 1 &&
702                         (agg->actor.key &
703                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
704
705                         if (j == 0)
706                                 default_slave = i;
707                         j++;
708                 }
709         }
710
711         switch (internals->mode4.agg_selection) {
712         case AGG_COUNT:
713                 mode_count_id = max_index(
714                                 (uint64_t *)agg_count, slaves_count);
715                 new_agg_id = mode_count_id;
716                 break;
717         case AGG_BANDWIDTH:
718                 mode_band_id = max_index(
719                                 (uint64_t *)agg_bandwidth, slaves_count);
720                 new_agg_id = mode_band_id;
721                 break;
722         case AGG_STABLE:
723                 if (default_slave == slaves_count)
724                         new_agg_id = slave_id;
725                 else
726                         new_agg_id = slaves[default_slave];
727                 break;
728         default:
729                 if (default_slave == slaves_count)
730                         new_agg_id = slave_id;
731                 else
732                         new_agg_id = slaves[default_slave];
733                 break;
734         }
735
736         if (new_agg_id != port->aggregator_port_id) {
737                 port->aggregator_port_id = new_agg_id;
738
739                 MODE4_DEBUG("-> SELECTED: ID=%3u\n"
740                         "\t%s aggregator ID=%3u\n",
741                         port->aggregator_port_id,
742                         port->aggregator_port_id == slave_id ?
743                                 "aggregator not found, using default" : "aggregator found",
744                         port->aggregator_port_id);
745         }
746
747         port->selected = SELECTED;
748 }
749
750 /* Function maps DPDK speed to bonding speed stored in key field */
751 static uint16_t
752 link_speed_key(uint16_t speed) {
753         uint16_t key_speed;
754
755         switch (speed) {
756         case ETH_SPEED_NUM_NONE:
757                 key_speed = 0x00;
758                 break;
759         case ETH_SPEED_NUM_10M:
760                 key_speed = BOND_LINK_SPEED_KEY_10M;
761                 break;
762         case ETH_SPEED_NUM_100M:
763                 key_speed = BOND_LINK_SPEED_KEY_100M;
764                 break;
765         case ETH_SPEED_NUM_1G:
766                 key_speed = BOND_LINK_SPEED_KEY_1000M;
767                 break;
768         case ETH_SPEED_NUM_10G:
769                 key_speed = BOND_LINK_SPEED_KEY_10G;
770                 break;
771         case ETH_SPEED_NUM_20G:
772                 key_speed = BOND_LINK_SPEED_KEY_20G;
773                 break;
774         case ETH_SPEED_NUM_40G:
775                 key_speed = BOND_LINK_SPEED_KEY_40G;
776                 break;
777         default:
778                 /* Unknown speed*/
779                 key_speed = 0xFFFF;
780         }
781
782         return key_speed;
783 }
784
785 static void
786 rx_machine_update(struct bond_dev_private *internals, uint16_t slave_id,
787                 struct rte_mbuf *lacp_pkt) {
788         struct lacpdu_header *lacp;
789         struct lacpdu_actor_partner_params *partner;
790
791         if (lacp_pkt != NULL) {
792                 lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
793                 RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
794
795                 partner = &lacp->lacpdu.partner;
796                 if (rte_is_same_ether_addr(&partner->port_params.system,
797                         &internals->mode4.mac_addr)) {
798                         /* This LACP frame is sending to the bonding port
799                          * so pass it to rx_machine.
800                          */
801                         rx_machine(internals, slave_id, &lacp->lacpdu);
802                 }
803                 rte_pktmbuf_free(lacp_pkt);
804         } else
805                 rx_machine(internals, slave_id, NULL);
806 }
807
808 static void
809 bond_mode_8023ad_periodic_cb(void *arg)
810 {
811         struct rte_eth_dev *bond_dev = arg;
812         struct bond_dev_private *internals = bond_dev->data->dev_private;
813         struct port *port;
814         struct rte_eth_link link_info;
815         struct rte_ether_addr slave_addr;
816         struct rte_mbuf *lacp_pkt = NULL;
817         uint16_t slave_id;
818         uint16_t i;
819
820
821         /* Update link status on each port */
822         for (i = 0; i < internals->active_slave_count; i++) {
823                 uint16_t key;
824
825                 slave_id = internals->active_slaves[i];
826                 rte_eth_link_get_nowait(slave_id, &link_info);
827                 rte_eth_macaddr_get(slave_id, &slave_addr);
828
829                 if (link_info.link_status != 0) {
830                         key = link_speed_key(link_info.link_speed) << 1;
831                         if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
832                                 key |= BOND_LINK_FULL_DUPLEX_KEY;
833                 } else
834                         key = 0;
835
836                 port = &bond_mode_8023ad_ports[slave_id];
837
838                 key = rte_cpu_to_be_16(key);
839                 if (key != port->actor.key) {
840                         if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
841                                 set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
842
843                         port->actor.key = key;
844                         SM_FLAG_SET(port, NTT);
845                 }
846
847                 if (!rte_is_same_ether_addr(&port->actor.system, &slave_addr)) {
848                         rte_ether_addr_copy(&slave_addr, &port->actor.system);
849                         if (port->aggregator_port_id == slave_id)
850                                 SM_FLAG_SET(port, NTT);
851                 }
852         }
853
854         for (i = 0; i < internals->active_slave_count; i++) {
855                 slave_id = internals->active_slaves[i];
856                 port = &bond_mode_8023ad_ports[slave_id];
857
858                 if ((port->actor.key &
859                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
860
861                         SM_FLAG_SET(port, BEGIN);
862
863                         /* LACP is disabled on half duples or link is down */
864                         if (SM_FLAG(port, LACP_ENABLED)) {
865                                 /* If port was enabled set it to BEGIN state */
866                                 SM_FLAG_CLR(port, LACP_ENABLED);
867                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
868                                 ACTOR_STATE_CLR(port, COLLECTING);
869                         }
870
871                         /* Skip this port processing */
872                         continue;
873                 }
874
875                 SM_FLAG_SET(port, LACP_ENABLED);
876
877                 if (internals->mode4.dedicated_queues.enabled == 0) {
878                         /* Find LACP packet to this port. Do not check subtype,
879                          * it is done in function that queued packet
880                          */
881                         int retval = rte_ring_dequeue(port->rx_ring,
882                                         (void **)&lacp_pkt);
883
884                         if (retval != 0)
885                                 lacp_pkt = NULL;
886
887                         rx_machine_update(internals, slave_id, lacp_pkt);
888                 } else {
889                         uint16_t rx_count = rte_eth_rx_burst(slave_id,
890                                         internals->mode4.dedicated_queues.rx_qid,
891                                         &lacp_pkt, 1);
892
893                         if (rx_count == 1)
894                                 bond_mode_8023ad_handle_slow_pkt(internals,
895                                                 slave_id, lacp_pkt);
896                         else
897                                 rx_machine_update(internals, slave_id, NULL);
898                 }
899
900                 periodic_machine(internals, slave_id);
901                 mux_machine(internals, slave_id);
902                 tx_machine(internals, slave_id);
903                 selection_logic(internals, slave_id);
904
905                 SM_FLAG_CLR(port, BEGIN);
906                 show_warnings(slave_id);
907         }
908
909         rte_eal_alarm_set(internals->mode4.update_timeout_us,
910                         bond_mode_8023ad_periodic_cb, arg);
911 }
912
913 static int
914 bond_mode_8023ad_register_lacp_mac(uint16_t slave_id)
915 {
916         int ret;
917
918         rte_eth_allmulticast_enable(slave_id);
919         if (rte_eth_allmulticast_get(slave_id)) {
920                 RTE_BOND_LOG(DEBUG, "forced allmulti for port %u",
921                              slave_id);
922                 bond_mode_8023ad_ports[slave_id].forced_rx_flags =
923                                 BOND_8023AD_FORCED_ALLMULTI;
924                 return 0;
925         }
926
927         ret = rte_eth_promiscuous_enable(slave_id);
928         if (ret != 0) {
929                 RTE_BOND_LOG(ERR,
930                         "failed to enable promiscuous mode for port %u: %s",
931                         slave_id, rte_strerror(-ret));
932         }
933         if (rte_eth_promiscuous_get(slave_id)) {
934                 RTE_BOND_LOG(DEBUG, "forced promiscuous for port %u",
935                              slave_id);
936                 bond_mode_8023ad_ports[slave_id].forced_rx_flags =
937                                 BOND_8023AD_FORCED_PROMISC;
938                 return 0;
939         }
940
941         return -1;
942 }
943
944 static void
945 bond_mode_8023ad_unregister_lacp_mac(uint16_t slave_id)
946 {
947         int ret;
948
949         switch (bond_mode_8023ad_ports[slave_id].forced_rx_flags) {
950         case BOND_8023AD_FORCED_ALLMULTI:
951                 RTE_BOND_LOG(DEBUG, "unset allmulti for port %u", slave_id);
952                 rte_eth_allmulticast_disable(slave_id);
953                 break;
954
955         case BOND_8023AD_FORCED_PROMISC:
956                 RTE_BOND_LOG(DEBUG, "unset promisc for port %u", slave_id);
957                 ret = rte_eth_promiscuous_disable(slave_id);
958                 if (ret != 0)
959                         RTE_BOND_LOG(ERR,
960                                 "failed to disable promiscuous mode for port %u: %s",
961                                 slave_id, rte_strerror(-ret));
962                 break;
963
964         default:
965                 break;
966         }
967 }
968
969 void
970 bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev,
971                                 uint16_t slave_id)
972 {
973         struct bond_dev_private *internals = bond_dev->data->dev_private;
974
975         struct port *port = &bond_mode_8023ad_ports[slave_id];
976         struct port_params initial = {
977                         .system = { { 0 } },
978                         .system_priority = rte_cpu_to_be_16(0xFFFF),
979                         .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
980                         .port_priority = rte_cpu_to_be_16(0x00FF),
981                         .port_number = 0,
982         };
983
984         char mem_name[RTE_ETH_NAME_MAX_LEN];
985         int socket_id;
986         unsigned element_size;
987         uint32_t total_tx_desc;
988         struct bond_tx_queue *bd_tx_q;
989         uint16_t q_id;
990
991         /* Given slave mus not be in active list */
992         RTE_ASSERT(find_slave_by_id(internals->active_slaves,
993         internals->active_slave_count, slave_id) == internals->active_slave_count);
994         RTE_SET_USED(internals); /* used only for assert when enabled */
995
996         memcpy(&port->actor, &initial, sizeof(struct port_params));
997         /* Standard requires that port ID must be grater than 0.
998          * Add 1 do get corresponding port_number */
999         port->actor.port_number = rte_cpu_to_be_16(slave_id + 1);
1000
1001         memcpy(&port->partner, &initial, sizeof(struct port_params));
1002
1003         /* default states */
1004         port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
1005         port->partner_state = STATE_LACP_ACTIVE | STATE_AGGREGATION;
1006         port->sm_flags = SM_FLAGS_BEGIN;
1007
1008         /* use this port as agregator */
1009         port->aggregator_port_id = slave_id;
1010
1011         if (bond_mode_8023ad_register_lacp_mac(slave_id) < 0) {
1012                 RTE_BOND_LOG(WARNING, "slave %u is most likely broken and won't receive LACP packets",
1013                              slave_id);
1014         }
1015
1016         timer_cancel(&port->warning_timer);
1017
1018         if (port->mbuf_pool != NULL)
1019                 return;
1020
1021         RTE_ASSERT(port->rx_ring == NULL);
1022         RTE_ASSERT(port->tx_ring == NULL);
1023
1024         socket_id = rte_eth_dev_socket_id(slave_id);
1025         if (socket_id == (int)LCORE_ID_ANY)
1026                 socket_id = rte_socket_id();
1027
1028         element_size = sizeof(struct slow_protocol_frame) +
1029                                 RTE_PKTMBUF_HEADROOM;
1030
1031         /* The size of the mempool should be at least:
1032          * the sum of the TX descriptors + BOND_MODE_8023AX_SLAVE_TX_PKTS */
1033         total_tx_desc = BOND_MODE_8023AX_SLAVE_TX_PKTS;
1034         for (q_id = 0; q_id < bond_dev->data->nb_tx_queues; q_id++) {
1035                 bd_tx_q = (struct bond_tx_queue*)bond_dev->data->tx_queues[q_id];
1036                 total_tx_desc += bd_tx_q->nb_tx_desc;
1037         }
1038
1039         snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
1040         port->mbuf_pool = rte_pktmbuf_pool_create(mem_name, total_tx_desc,
1041                 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
1042                         32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
1043                 0, element_size, socket_id);
1044
1045         /* Any memory allocation failure in initialization is critical because
1046          * resources can't be free, so reinitialization is impossible. */
1047         if (port->mbuf_pool == NULL) {
1048                 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1049                         slave_id, mem_name, rte_strerror(rte_errno));
1050         }
1051
1052         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
1053         port->rx_ring = rte_ring_create(mem_name,
1054                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
1055
1056         if (port->rx_ring == NULL) {
1057                 rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
1058                         mem_name, rte_strerror(rte_errno));
1059         }
1060
1061         /* TX ring is at least one pkt longer to make room for marker packet. */
1062         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
1063         port->tx_ring = rte_ring_create(mem_name,
1064                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
1065
1066         if (port->tx_ring == NULL) {
1067                 rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
1068                         mem_name, rte_strerror(rte_errno));
1069         }
1070 }
1071
1072 int
1073 bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev __rte_unused,
1074                 uint16_t slave_id)
1075 {
1076         void *pkt = NULL;
1077         struct port *port = NULL;
1078         uint8_t old_partner_state;
1079
1080         port = &bond_mode_8023ad_ports[slave_id];
1081
1082         ACTOR_STATE_CLR(port, AGGREGATION);
1083         port->selected = UNSELECTED;
1084
1085         old_partner_state = port->partner_state;
1086         record_default(port);
1087
1088         bond_mode_8023ad_unregister_lacp_mac(slave_id);
1089
1090         /* If partner timeout state changes then disable timer */
1091         if (!((old_partner_state ^ port->partner_state) &
1092                         STATE_LACP_SHORT_TIMEOUT))
1093                 timer_cancel(&port->current_while_timer);
1094
1095         PARTNER_STATE_CLR(port, AGGREGATION);
1096         ACTOR_STATE_CLR(port, EXPIRED);
1097
1098         /* flush rx/tx rings */
1099         while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
1100                 rte_pktmbuf_free((struct rte_mbuf *)pkt);
1101
1102         while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
1103                         rte_pktmbuf_free((struct rte_mbuf *)pkt);
1104         return 0;
1105 }
1106
1107 void
1108 bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
1109 {
1110         struct bond_dev_private *internals = bond_dev->data->dev_private;
1111         struct rte_ether_addr slave_addr;
1112         struct port *slave, *agg_slave;
1113         uint16_t slave_id, i, j;
1114
1115         bond_mode_8023ad_stop(bond_dev);
1116
1117         for (i = 0; i < internals->active_slave_count; i++) {
1118                 slave_id = internals->active_slaves[i];
1119                 slave = &bond_mode_8023ad_ports[slave_id];
1120                 rte_eth_macaddr_get(slave_id, &slave_addr);
1121
1122                 if (rte_is_same_ether_addr(&slave_addr, &slave->actor.system))
1123                         continue;
1124
1125                 rte_ether_addr_copy(&slave_addr, &slave->actor.system);
1126                 /* Do nothing if this port is not an aggregator. In other case
1127                  * Set NTT flag on every port that use this aggregator. */
1128                 if (slave->aggregator_port_id != slave_id)
1129                         continue;
1130
1131                 for (j = 0; j < internals->active_slave_count; j++) {
1132                         agg_slave = &bond_mode_8023ad_ports[internals->active_slaves[j]];
1133                         if (agg_slave->aggregator_port_id == slave_id)
1134                                 SM_FLAG_SET(agg_slave, NTT);
1135                 }
1136         }
1137
1138         if (bond_dev->data->dev_started)
1139                 bond_mode_8023ad_start(bond_dev);
1140 }
1141
1142 static void
1143 bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
1144                 struct rte_eth_bond_8023ad_conf *conf)
1145 {
1146         struct bond_dev_private *internals = dev->data->dev_private;
1147         struct mode8023ad_private *mode4 = &internals->mode4;
1148         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1149
1150         conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
1151         conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
1152         conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
1153         conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
1154         conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
1155         conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
1156         conf->update_timeout_ms = mode4->update_timeout_us / 1000;
1157         conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
1158         conf->slowrx_cb = mode4->slowrx_cb;
1159         conf->agg_selection = mode4->agg_selection;
1160 }
1161
1162 static void
1163 bond_mode_8023ad_conf_get_default(struct rte_eth_bond_8023ad_conf *conf)
1164 {
1165         conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
1166         conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
1167         conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
1168         conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
1169         conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
1170         conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
1171         conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
1172         conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
1173         conf->slowrx_cb = NULL;
1174         conf->agg_selection = AGG_STABLE;
1175 }
1176
1177 static void
1178 bond_mode_8023ad_conf_assign(struct mode8023ad_private *mode4,
1179                 struct rte_eth_bond_8023ad_conf *conf)
1180 {
1181         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1182
1183         mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
1184         mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
1185         mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
1186         mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
1187         mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
1188         mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
1189         mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
1190         mode4->update_timeout_us = conf->update_timeout_ms * 1000;
1191
1192         mode4->dedicated_queues.enabled = 0;
1193         mode4->dedicated_queues.rx_qid = UINT16_MAX;
1194         mode4->dedicated_queues.tx_qid = UINT16_MAX;
1195 }
1196
1197 void
1198 bond_mode_8023ad_setup(struct rte_eth_dev *dev,
1199                 struct rte_eth_bond_8023ad_conf *conf)
1200 {
1201         struct rte_eth_bond_8023ad_conf def_conf;
1202         struct bond_dev_private *internals = dev->data->dev_private;
1203         struct mode8023ad_private *mode4 = &internals->mode4;
1204
1205         if (conf == NULL) {
1206                 conf = &def_conf;
1207                 bond_mode_8023ad_conf_get_default(conf);
1208         }
1209
1210         bond_mode_8023ad_stop(dev);
1211         bond_mode_8023ad_conf_assign(mode4, conf);
1212         mode4->slowrx_cb = conf->slowrx_cb;
1213         mode4->agg_selection = AGG_STABLE;
1214
1215         if (dev->data->dev_started)
1216                 bond_mode_8023ad_start(dev);
1217 }
1218
1219 int
1220 bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
1221 {
1222         struct bond_dev_private *internals = bond_dev->data->dev_private;
1223         uint16_t i;
1224
1225         for (i = 0; i < internals->active_slave_count; i++)
1226                 bond_mode_8023ad_activate_slave(bond_dev,
1227                                 internals->active_slaves[i]);
1228
1229         return 0;
1230 }
1231
1232 int
1233 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
1234 {
1235         struct bond_dev_private *internals = bond_dev->data->dev_private;
1236         struct mode8023ad_private *mode4 = &internals->mode4;
1237         static const uint64_t us = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000;
1238
1239         rte_eth_macaddr_get(internals->port_id, &mode4->mac_addr);
1240         if (mode4->slowrx_cb)
1241                 return rte_eal_alarm_set(us, &bond_mode_8023ad_ext_periodic_cb,
1242                                          bond_dev);
1243
1244         return rte_eal_alarm_set(us, &bond_mode_8023ad_periodic_cb, bond_dev);
1245 }
1246
1247 void
1248 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
1249 {
1250         struct bond_dev_private *internals = bond_dev->data->dev_private;
1251         struct mode8023ad_private *mode4 = &internals->mode4;
1252
1253         if (mode4->slowrx_cb) {
1254                 rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb,
1255                                      bond_dev);
1256                 return;
1257         }
1258         rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
1259 }
1260
1261 void
1262 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
1263                                   uint16_t slave_id, struct rte_mbuf *pkt)
1264 {
1265         struct mode8023ad_private *mode4 = &internals->mode4;
1266         struct port *port = &bond_mode_8023ad_ports[slave_id];
1267         struct marker_header *m_hdr;
1268         uint64_t marker_timer, old_marker_timer;
1269         int retval;
1270         uint8_t wrn, subtype;
1271         /* If packet is a marker, we send response now by reusing given packet
1272          * and update only source MAC, destination MAC is multicast so don't
1273          * update it. Other frames will be handled later by state machines */
1274         subtype = rte_pktmbuf_mtod(pkt,
1275                         struct slow_protocol_frame *)->slow_protocol.subtype;
1276
1277         if (subtype == SLOW_SUBTYPE_MARKER) {
1278                 m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
1279
1280                 if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
1281                         wrn = WRN_UNKNOWN_MARKER_TYPE;
1282                         goto free_out;
1283                 }
1284
1285                 /* Setup marker timer. Do it in loop in case concurrent access. */
1286                 do {
1287                         old_marker_timer = port->rx_marker_timer;
1288                         if (!timer_is_expired(&old_marker_timer)) {
1289                                 wrn = WRN_RX_MARKER_TO_FAST;
1290                                 goto free_out;
1291                         }
1292
1293                         timer_set(&marker_timer, mode4->rx_marker_timeout);
1294                         retval = rte_atomic64_cmpset(&port->rx_marker_timer,
1295                                 old_marker_timer, marker_timer);
1296                 } while (unlikely(retval == 0));
1297
1298                 m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
1299                 rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
1300
1301                 if (internals->mode4.dedicated_queues.enabled == 0) {
1302                         int retval = rte_ring_enqueue(port->tx_ring, pkt);
1303                         if (retval != 0) {
1304                                 /* reset timer */
1305                                 port->rx_marker_timer = 0;
1306                                 wrn = WRN_TX_QUEUE_FULL;
1307                                 goto free_out;
1308                         }
1309                 } else {
1310                         /* Send packet directly to the slow queue */
1311                         uint16_t tx_count = rte_eth_tx_burst(slave_id,
1312                                         internals->mode4.dedicated_queues.tx_qid,
1313                                         &pkt, 1);
1314                         if (tx_count != 1) {
1315                                 /* reset timer */
1316                                 port->rx_marker_timer = 0;
1317                                 wrn = WRN_TX_QUEUE_FULL;
1318                                 goto free_out;
1319                         }
1320                 }
1321         } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
1322                 if (internals->mode4.dedicated_queues.enabled == 0) {
1323                         int retval = rte_ring_enqueue(port->rx_ring, pkt);
1324                         if (retval != 0) {
1325                                 /* If RX fing full free lacpdu message and drop packet */
1326                                 wrn = WRN_RX_QUEUE_FULL;
1327                                 goto free_out;
1328                         }
1329                 } else
1330                         rx_machine_update(internals, slave_id, pkt);
1331         } else {
1332                 wrn = WRN_UNKNOWN_SLOW_TYPE;
1333                 goto free_out;
1334         }
1335
1336         return;
1337
1338 free_out:
1339         set_warning_flags(port, wrn);
1340         rte_pktmbuf_free(pkt);
1341 }
1342
1343 int
1344 rte_eth_bond_8023ad_conf_get(uint16_t port_id,
1345                 struct rte_eth_bond_8023ad_conf *conf)
1346 {
1347         struct rte_eth_dev *bond_dev;
1348
1349         if (valid_bonded_port_id(port_id) != 0)
1350                 return -EINVAL;
1351
1352         if (conf == NULL)
1353                 return -EINVAL;
1354
1355         bond_dev = &rte_eth_devices[port_id];
1356         bond_mode_8023ad_conf_get(bond_dev, conf);
1357         return 0;
1358 }
1359
1360 int
1361 rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id,
1362                 enum rte_bond_8023ad_agg_selection agg_selection)
1363 {
1364         struct rte_eth_dev *bond_dev;
1365         struct bond_dev_private *internals;
1366         struct mode8023ad_private *mode4;
1367
1368         bond_dev = &rte_eth_devices[port_id];
1369         internals = bond_dev->data->dev_private;
1370
1371         if (valid_bonded_port_id(port_id) != 0)
1372                 return -EINVAL;
1373         if (internals->mode != 4)
1374                 return -EINVAL;
1375
1376         mode4 = &internals->mode4;
1377         if (agg_selection == AGG_COUNT || agg_selection == AGG_BANDWIDTH
1378                         || agg_selection == AGG_STABLE)
1379                 mode4->agg_selection = agg_selection;
1380         return 0;
1381 }
1382
1383 int rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id)
1384 {
1385         struct rte_eth_dev *bond_dev;
1386         struct bond_dev_private *internals;
1387         struct mode8023ad_private *mode4;
1388
1389         bond_dev = &rte_eth_devices[port_id];
1390         internals = bond_dev->data->dev_private;
1391
1392         if (valid_bonded_port_id(port_id) != 0)
1393                 return -EINVAL;
1394         if (internals->mode != 4)
1395                 return -EINVAL;
1396         mode4 = &internals->mode4;
1397
1398         return mode4->agg_selection;
1399 }
1400
1401
1402
1403 static int
1404 bond_8023ad_setup_validate(uint16_t port_id,
1405                 struct rte_eth_bond_8023ad_conf *conf)
1406 {
1407         if (valid_bonded_port_id(port_id) != 0)
1408                 return -EINVAL;
1409
1410         if (conf != NULL) {
1411                 /* Basic sanity check */
1412                 if (conf->slow_periodic_ms == 0 ||
1413                                 conf->fast_periodic_ms >= conf->slow_periodic_ms ||
1414                                 conf->long_timeout_ms == 0 ||
1415                                 conf->short_timeout_ms >= conf->long_timeout_ms ||
1416                                 conf->aggregate_wait_timeout_ms == 0 ||
1417                                 conf->tx_period_ms == 0 ||
1418                                 conf->rx_marker_period_ms == 0 ||
1419                                 conf->update_timeout_ms == 0) {
1420                         RTE_BOND_LOG(ERR, "given mode 4 configuration is invalid");
1421                         return -EINVAL;
1422                 }
1423         }
1424
1425         return 0;
1426 }
1427
1428
1429 int
1430 rte_eth_bond_8023ad_setup(uint16_t port_id,
1431                 struct rte_eth_bond_8023ad_conf *conf)
1432 {
1433         struct rte_eth_dev *bond_dev;
1434         int err;
1435
1436         err = bond_8023ad_setup_validate(port_id, conf);
1437         if (err != 0)
1438                 return err;
1439
1440         bond_dev = &rte_eth_devices[port_id];
1441         bond_mode_8023ad_setup(bond_dev, conf);
1442
1443         return 0;
1444 }
1445
1446
1447
1448
1449
1450 int
1451 rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id,
1452                 struct rte_eth_bond_8023ad_slave_info *info)
1453 {
1454         struct rte_eth_dev *bond_dev;
1455         struct bond_dev_private *internals;
1456         struct port *port;
1457
1458         if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
1459                         rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1460                 return -EINVAL;
1461
1462         bond_dev = &rte_eth_devices[port_id];
1463
1464         internals = bond_dev->data->dev_private;
1465         if (find_slave_by_id(internals->active_slaves,
1466                         internals->active_slave_count, slave_id) ==
1467                                 internals->active_slave_count)
1468                 return -EINVAL;
1469
1470         port = &bond_mode_8023ad_ports[slave_id];
1471         info->selected = port->selected;
1472
1473         info->actor_state = port->actor_state;
1474         rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
1475
1476         info->partner_state = port->partner_state;
1477         rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
1478
1479         info->agg_port_id = port->aggregator_port_id;
1480         return 0;
1481 }
1482
1483 static int
1484 bond_8023ad_ext_validate(uint16_t port_id, uint16_t slave_id)
1485 {
1486         struct rte_eth_dev *bond_dev;
1487         struct bond_dev_private *internals;
1488         struct mode8023ad_private *mode4;
1489
1490         if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1491                 return -EINVAL;
1492
1493         bond_dev = &rte_eth_devices[port_id];
1494
1495         if (!bond_dev->data->dev_started)
1496                 return -EINVAL;
1497
1498         internals = bond_dev->data->dev_private;
1499         if (find_slave_by_id(internals->active_slaves,
1500                         internals->active_slave_count, slave_id) ==
1501                                 internals->active_slave_count)
1502                 return -EINVAL;
1503
1504         mode4 = &internals->mode4;
1505         if (mode4->slowrx_cb == NULL)
1506                 return -EINVAL;
1507
1508         return 0;
1509 }
1510
1511 int
1512 rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id,
1513                                 int enabled)
1514 {
1515         struct port *port;
1516         int res;
1517
1518         res = bond_8023ad_ext_validate(port_id, slave_id);
1519         if (res != 0)
1520                 return res;
1521
1522         port = &bond_mode_8023ad_ports[slave_id];
1523
1524         if (enabled)
1525                 ACTOR_STATE_SET(port, COLLECTING);
1526         else
1527                 ACTOR_STATE_CLR(port, COLLECTING);
1528
1529         return 0;
1530 }
1531
1532 int
1533 rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id,
1534                                 int enabled)
1535 {
1536         struct port *port;
1537         int res;
1538
1539         res = bond_8023ad_ext_validate(port_id, slave_id);
1540         if (res != 0)
1541                 return res;
1542
1543         port = &bond_mode_8023ad_ports[slave_id];
1544
1545         if (enabled)
1546                 ACTOR_STATE_SET(port, DISTRIBUTING);
1547         else
1548                 ACTOR_STATE_CLR(port, DISTRIBUTING);
1549
1550         return 0;
1551 }
1552
1553 int
1554 rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id)
1555 {
1556         struct port *port;
1557         int err;
1558
1559         err = bond_8023ad_ext_validate(port_id, slave_id);
1560         if (err != 0)
1561                 return err;
1562
1563         port = &bond_mode_8023ad_ports[slave_id];
1564         return ACTOR_STATE(port, DISTRIBUTING);
1565 }
1566
1567 int
1568 rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id)
1569 {
1570         struct port *port;
1571         int err;
1572
1573         err = bond_8023ad_ext_validate(port_id, slave_id);
1574         if (err != 0)
1575                 return err;
1576
1577         port = &bond_mode_8023ad_ports[slave_id];
1578         return ACTOR_STATE(port, COLLECTING);
1579 }
1580
1581 int
1582 rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id,
1583                 struct rte_mbuf *lacp_pkt)
1584 {
1585         struct port *port;
1586         int res;
1587
1588         res = bond_8023ad_ext_validate(port_id, slave_id);
1589         if (res != 0)
1590                 return res;
1591
1592         port = &bond_mode_8023ad_ports[slave_id];
1593
1594         if (rte_pktmbuf_pkt_len(lacp_pkt) < sizeof(struct lacpdu_header))
1595                 return -EINVAL;
1596
1597         struct lacpdu_header *lacp;
1598
1599         /* only enqueue LACPDUs */
1600         lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
1601         if (lacp->lacpdu.subtype != SLOW_SUBTYPE_LACP)
1602                 return -EINVAL;
1603
1604         MODE4_DEBUG("sending LACP frame\n");
1605
1606         return rte_ring_enqueue(port->tx_ring, lacp_pkt);
1607 }
1608
1609 static void
1610 bond_mode_8023ad_ext_periodic_cb(void *arg)
1611 {
1612         struct rte_eth_dev *bond_dev = arg;
1613         struct bond_dev_private *internals = bond_dev->data->dev_private;
1614         struct mode8023ad_private *mode4 = &internals->mode4;
1615         struct port *port;
1616         void *pkt = NULL;
1617         uint16_t i, slave_id;
1618
1619         for (i = 0; i < internals->active_slave_count; i++) {
1620                 slave_id = internals->active_slaves[i];
1621                 port = &bond_mode_8023ad_ports[slave_id];
1622
1623                 if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
1624                         struct rte_mbuf *lacp_pkt = pkt;
1625                         struct lacpdu_header *lacp;
1626
1627                         lacp = rte_pktmbuf_mtod(lacp_pkt,
1628                                                 struct lacpdu_header *);
1629                         RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
1630
1631                         /* This is LACP frame so pass it to rx callback.
1632                          * Callback is responsible for freeing mbuf.
1633                          */
1634                         mode4->slowrx_cb(slave_id, lacp_pkt);
1635                 }
1636         }
1637
1638         rte_eal_alarm_set(internals->mode4.update_timeout_us,
1639                         bond_mode_8023ad_ext_periodic_cb, arg);
1640 }
1641
1642 int
1643 rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port)
1644 {
1645         int retval = 0;
1646         struct rte_eth_dev *dev = &rte_eth_devices[port];
1647         struct bond_dev_private *internals = (struct bond_dev_private *)
1648                 dev->data->dev_private;
1649
1650         if (check_for_bonded_ethdev(dev) != 0)
1651                 return -1;
1652
1653         if (bond_8023ad_slow_pkt_hw_filter_supported(port) != 0)
1654                 return -1;
1655
1656         /* Device must be stopped to set up slow queue */
1657         if (dev->data->dev_started)
1658                 return -1;
1659
1660         internals->mode4.dedicated_queues.enabled = 1;
1661
1662         bond_ethdev_mode_set(dev, internals->mode);
1663         return retval;
1664 }
1665
1666 int
1667 rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port)
1668 {
1669         int retval = 0;
1670         struct rte_eth_dev *dev = &rte_eth_devices[port];
1671         struct bond_dev_private *internals = (struct bond_dev_private *)
1672                 dev->data->dev_private;
1673
1674         if (check_for_bonded_ethdev(dev) != 0)
1675                 return -1;
1676
1677         /* Device must be stopped to set up slow queue */
1678         if (dev->data->dev_started)
1679                 return -1;
1680
1681         internals->mode4.dedicated_queues.enabled = 0;
1682
1683         bond_ethdev_mode_set(dev, internals->mode);
1684
1685         return retval;
1686 }