net/bonding: fix unicast packets filtering
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22
23 #include "rte_eth_bond.h"
24 #include "rte_eth_bond_private.h"
25 #include "rte_eth_bond_8023ad_private.h"
26
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35
36 static inline size_t
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39         size_t vlan_offset = 0;
40
41         if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42                 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43                 struct rte_vlan_hdr *vlan_hdr =
44                         (struct rte_vlan_hdr *)(eth_hdr + 1);
45
46                 vlan_offset = sizeof(struct rte_vlan_hdr);
47                 *proto = vlan_hdr->eth_proto;
48
49                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50                         vlan_hdr = vlan_hdr + 1;
51                         *proto = vlan_hdr->eth_proto;
52                         vlan_offset += sizeof(struct rte_vlan_hdr);
53                 }
54         }
55         return vlan_offset;
56 }
57
58 static uint16_t
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61         struct bond_dev_private *internals;
62
63         uint16_t num_rx_total = 0;
64         uint16_t slave_count;
65         uint16_t active_slave;
66         int i;
67
68         /* Cast to structure, containing bonded device's port id and queue id */
69         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70         internals = bd_rx_q->dev_private;
71         slave_count = internals->active_slave_count;
72         active_slave = internals->active_slave;
73
74         for (i = 0; i < slave_count && nb_pkts; i++) {
75                 uint16_t num_rx_slave;
76
77                 /* Offset of pointer to *bufs increases as packets are received
78                  * from other slaves */
79                 num_rx_slave =
80                         rte_eth_rx_burst(internals->active_slaves[active_slave],
81                                          bd_rx_q->queue_id,
82                                          bufs + num_rx_total, nb_pkts);
83                 num_rx_total += num_rx_slave;
84                 nb_pkts -= num_rx_slave;
85                 if (++active_slave == slave_count)
86                         active_slave = 0;
87         }
88
89         if (++internals->active_slave >= slave_count)
90                 internals->active_slave = 0;
91         return num_rx_total;
92 }
93
94 static uint16_t
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96                 uint16_t nb_pkts)
97 {
98         struct bond_dev_private *internals;
99
100         /* Cast to structure, containing bonded device's port id and queue id */
101         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102
103         internals = bd_rx_q->dev_private;
104
105         return rte_eth_rx_burst(internals->current_primary_port,
106                         bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112         const uint16_t ether_type_slow_be =
113                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114
115         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116                 (ethertype == ether_type_slow_be &&
117                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125         .dst.addr_bytes = { 0 },
126         .src.addr_bytes = { 0 },
127         .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131         .dst.addr_bytes = { 0 },
132         .src.addr_bytes = { 0 },
133         .type = 0xFFFF,
134 };
135
136 static struct rte_flow_item flow_item_8023ad[] = {
137         {
138                 .type = RTE_FLOW_ITEM_TYPE_ETH,
139                 .spec = &flow_item_eth_type_8023ad,
140                 .last = NULL,
141                 .mask = &flow_item_eth_mask_type_8023ad,
142         },
143         {
144                 .type = RTE_FLOW_ITEM_TYPE_END,
145                 .spec = NULL,
146                 .last = NULL,
147                 .mask = NULL,
148         }
149 };
150
151 const struct rte_flow_attr flow_attr_8023ad = {
152         .group = 0,
153         .priority = 0,
154         .ingress = 1,
155         .egress = 0,
156         .reserved = 0,
157 };
158
159 int
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161                 uint16_t slave_port) {
162         struct rte_eth_dev_info slave_info;
163         struct rte_flow_error error;
164         struct bond_dev_private *internals = bond_dev->data->dev_private;
165
166         const struct rte_flow_action_queue lacp_queue_conf = {
167                 .index = 0,
168         };
169
170         const struct rte_flow_action actions[] = {
171                 {
172                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173                         .conf = &lacp_queue_conf
174                 },
175                 {
176                         .type = RTE_FLOW_ACTION_TYPE_END,
177                 }
178         };
179
180         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181                         flow_item_8023ad, actions, &error);
182         if (ret < 0) {
183                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184                                 __func__, error.message, slave_port,
185                                 internals->mode4.dedicated_queues.rx_qid);
186                 return -1;
187         }
188
189         rte_eth_dev_info_get(slave_port, &slave_info);
190         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
191                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
192                 RTE_BOND_LOG(ERR,
193                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
194                         __func__, slave_port);
195                 return -1;
196         }
197
198         return 0;
199 }
200
201 int
202 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
203         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
204         struct bond_dev_private *internals = bond_dev->data->dev_private;
205         struct rte_eth_dev_info bond_info;
206         uint16_t idx;
207
208         /* Verify if all slaves in bonding supports flow director and */
209         if (internals->slave_count > 0) {
210                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
211
212                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
214
215                 for (idx = 0; idx < internals->slave_count; idx++) {
216                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
217                                         internals->slaves[idx].port_id) != 0)
218                                 return -1;
219                 }
220         }
221
222         return 0;
223 }
224
225 int
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
227
228         struct rte_flow_error error;
229         struct bond_dev_private *internals = bond_dev->data->dev_private;
230         struct rte_flow_action_queue lacp_queue_conf = {
231                 .index = internals->mode4.dedicated_queues.rx_qid,
232         };
233
234         const struct rte_flow_action actions[] = {
235                 {
236                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
237                         .conf = &lacp_queue_conf
238                 },
239                 {
240                         .type = RTE_FLOW_ACTION_TYPE_END,
241                 }
242         };
243
244         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
245                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
246         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
247                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
248                                 "(slave_port=%d queue_id=%d)",
249                                 error.message, slave_port,
250                                 internals->mode4.dedicated_queues.rx_qid);
251                 return -1;
252         }
253
254         return 0;
255 }
256
257 static inline uint16_t
258 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
259                 bool dedicated_rxq)
260 {
261         /* Cast to structure, containing bonded device's port id and queue id */
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         struct rte_eth_dev *bonded_eth_dev =
265                                         &rte_eth_devices[internals->port_id];
266         struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
267         struct rte_ether_hdr *hdr;
268
269         const uint16_t ether_type_slow_be =
270                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
271         uint16_t num_rx_total = 0;      /* Total number of received packets */
272         uint16_t slaves[RTE_MAX_ETHPORTS];
273         uint16_t slave_count, idx;
274
275         uint8_t collecting;  /* current slave collecting status */
276         const uint8_t promisc = internals->promiscuous_en;
277         uint8_t subtype;
278         uint16_t i;
279         uint16_t j;
280         uint16_t k;
281
282         /* Copy slave list to protect against slave up/down changes during tx
283          * bursting */
284         slave_count = internals->active_slave_count;
285         memcpy(slaves, internals->active_slaves,
286                         sizeof(internals->active_slaves[0]) * slave_count);
287
288         idx = internals->active_slave;
289         if (idx >= slave_count) {
290                 internals->active_slave = 0;
291                 idx = 0;
292         }
293         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
294                 j = num_rx_total;
295                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
296                                          COLLECTING);
297
298                 /* Read packets from this slave */
299                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
300                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
301
302                 for (k = j; k < 2 && k < num_rx_total; k++)
303                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
304
305                 /* Handle slow protocol packets. */
306                 while (j < num_rx_total) {
307                         if (j + 3 < num_rx_total)
308                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
309
310                         hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
311                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
312
313                         /* Remove packet from array if:
314                          * - it is slow packet but no dedicated rxq is present,
315                          * - slave is not in collecting state,
316                          * - bonding interface is not in promiscuous mode and
317                          *   packet is not multicast and address does not match,
318                          */
319                         if (unlikely(
320                                 (!dedicated_rxq &&
321                                  is_lacp_packets(hdr->ether_type, subtype,
322                                                  bufs[j])) ||
323                                 !collecting ||
324                                 (!promisc &&
325                                  !rte_is_multicast_ether_addr(&hdr->d_addr) &&
326                                  !rte_is_same_ether_addr(bond_mac,
327                                                      &hdr->d_addr)))) {
328
329                                 if (hdr->ether_type == ether_type_slow_be) {
330                                         bond_mode_8023ad_handle_slow_pkt(
331                                             internals, slaves[idx], bufs[j]);
332                                 } else
333                                         rte_pktmbuf_free(bufs[j]);
334
335                                 /* Packet is managed by mode 4 or dropped, shift the array */
336                                 num_rx_total--;
337                                 if (j < num_rx_total) {
338                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
339                                                 (num_rx_total - j));
340                                 }
341                         } else
342                                 j++;
343                 }
344                 if (unlikely(++idx == slave_count))
345                         idx = 0;
346         }
347
348         if (++internals->active_slave >= slave_count)
349                 internals->active_slave = 0;
350
351         return num_rx_total;
352 }
353
354 static uint16_t
355 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
356                 uint16_t nb_pkts)
357 {
358         return rx_burst_8023ad(queue, bufs, nb_pkts, false);
359 }
360
361 static uint16_t
362 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
363                 uint16_t nb_pkts)
364 {
365         return rx_burst_8023ad(queue, bufs, nb_pkts, true);
366 }
367
368 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
369 uint32_t burstnumberRX;
370 uint32_t burstnumberTX;
371
372 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
373
374 static void
375 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
376 {
377         switch (arp_op) {
378         case RTE_ARP_OP_REQUEST:
379                 strlcpy(buf, "ARP Request", buf_len);
380                 return;
381         case RTE_ARP_OP_REPLY:
382                 strlcpy(buf, "ARP Reply", buf_len);
383                 return;
384         case RTE_ARP_OP_REVREQUEST:
385                 strlcpy(buf, "Reverse ARP Request", buf_len);
386                 return;
387         case RTE_ARP_OP_REVREPLY:
388                 strlcpy(buf, "Reverse ARP Reply", buf_len);
389                 return;
390         case RTE_ARP_OP_INVREQUEST:
391                 strlcpy(buf, "Peer Identify Request", buf_len);
392                 return;
393         case RTE_ARP_OP_INVREPLY:
394                 strlcpy(buf, "Peer Identify Reply", buf_len);
395                 return;
396         default:
397                 break;
398         }
399         strlcpy(buf, "Unknown", buf_len);
400         return;
401 }
402 #endif
403 #define MaxIPv4String   16
404 static void
405 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
406 {
407         uint32_t ipv4_addr;
408
409         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
410         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
411                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
412                 ipv4_addr & 0xFF);
413 }
414
415 #define MAX_CLIENTS_NUMBER      128
416 uint8_t active_clients;
417 struct client_stats_t {
418         uint16_t port;
419         uint32_t ipv4_addr;
420         uint32_t ipv4_rx_packets;
421         uint32_t ipv4_tx_packets;
422 };
423 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
424
425 static void
426 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
427 {
428         int i = 0;
429
430         for (; i < MAX_CLIENTS_NUMBER; i++)     {
431                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
432                         /* Just update RX packets number for this client */
433                         if (TXorRXindicator == &burstnumberRX)
434                                 client_stats[i].ipv4_rx_packets++;
435                         else
436                                 client_stats[i].ipv4_tx_packets++;
437                         return;
438                 }
439         }
440         /* We have a new client. Insert him to the table, and increment stats */
441         if (TXorRXindicator == &burstnumberRX)
442                 client_stats[active_clients].ipv4_rx_packets++;
443         else
444                 client_stats[active_clients].ipv4_tx_packets++;
445         client_stats[active_clients].ipv4_addr = addr;
446         client_stats[active_clients].port = port;
447         active_clients++;
448
449 }
450
451 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
452 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
453         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
454                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
455                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
456                 info,                                                   \
457                 port,                                                   \
458                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
459                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
460                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
461                 src_ip,                                                 \
462                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
463                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
464                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
465                 dst_ip,                                                 \
466                 arp_op, ++burstnumber)
467 #endif
468
469 static void
470 mode6_debug(const char __attribute__((unused)) *info,
471         struct rte_ether_hdr *eth_h, uint16_t port,
472         uint32_t __attribute__((unused)) *burstnumber)
473 {
474         struct rte_ipv4_hdr *ipv4_h;
475 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
476         struct rte_arp_hdr *arp_h;
477         char dst_ip[16];
478         char ArpOp[24];
479         char buf[16];
480 #endif
481         char src_ip[16];
482
483         uint16_t ether_type = eth_h->ether_type;
484         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
485
486 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
487         strlcpy(buf, info, 16);
488 #endif
489
490         if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
491                 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
492                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
495                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
496 #endif
497                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
498         }
499 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
500         else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
501                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
502                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
503                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
504                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
505                                 ArpOp, sizeof(ArpOp));
506                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
507         }
508 #endif
509 }
510 #endif
511
512 static uint16_t
513 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
514 {
515         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
516         struct bond_dev_private *internals = bd_tx_q->dev_private;
517         struct rte_ether_hdr *eth_h;
518         uint16_t ether_type, offset;
519         uint16_t nb_recv_pkts;
520         int i;
521
522         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
523
524         for (i = 0; i < nb_recv_pkts; i++) {
525                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
526                 ether_type = eth_h->ether_type;
527                 offset = get_vlan_offset(eth_h, &ether_type);
528
529                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
530 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
531                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
532 #endif
533                         bond_mode_alb_arp_recv(eth_h, offset, internals);
534                 }
535 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
536                 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
537                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
538 #endif
539         }
540
541         return nb_recv_pkts;
542 }
543
544 static uint16_t
545 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
546                 uint16_t nb_pkts)
547 {
548         struct bond_dev_private *internals;
549         struct bond_tx_queue *bd_tx_q;
550
551         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
552         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
553
554         uint16_t num_of_slaves;
555         uint16_t slaves[RTE_MAX_ETHPORTS];
556
557         uint16_t num_tx_total = 0, num_tx_slave;
558
559         static int slave_idx = 0;
560         int i, cslave_idx = 0, tx_fail_total = 0;
561
562         bd_tx_q = (struct bond_tx_queue *)queue;
563         internals = bd_tx_q->dev_private;
564
565         /* Copy slave list to protect against slave up/down changes during tx
566          * bursting */
567         num_of_slaves = internals->active_slave_count;
568         memcpy(slaves, internals->active_slaves,
569                         sizeof(internals->active_slaves[0]) * num_of_slaves);
570
571         if (num_of_slaves < 1)
572                 return num_tx_total;
573
574         /* Populate slaves mbuf with which packets are to be sent on it  */
575         for (i = 0; i < nb_pkts; i++) {
576                 cslave_idx = (slave_idx + i) % num_of_slaves;
577                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
578         }
579
580         /* increment current slave index so the next call to tx burst starts on the
581          * next slave */
582         slave_idx = ++cslave_idx;
583
584         /* Send packet burst on each slave device */
585         for (i = 0; i < num_of_slaves; i++) {
586                 if (slave_nb_pkts[i] > 0) {
587                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
588                                         slave_bufs[i], slave_nb_pkts[i]);
589
590                         /* if tx burst fails move packets to end of bufs */
591                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
592                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
593
594                                 tx_fail_total += tx_fail_slave;
595
596                                 memcpy(&bufs[nb_pkts - tx_fail_total],
597                                        &slave_bufs[i][num_tx_slave],
598                                        tx_fail_slave * sizeof(bufs[0]));
599                         }
600                         num_tx_total += num_tx_slave;
601                 }
602         }
603
604         return num_tx_total;
605 }
606
607 static uint16_t
608 bond_ethdev_tx_burst_active_backup(void *queue,
609                 struct rte_mbuf **bufs, uint16_t nb_pkts)
610 {
611         struct bond_dev_private *internals;
612         struct bond_tx_queue *bd_tx_q;
613
614         bd_tx_q = (struct bond_tx_queue *)queue;
615         internals = bd_tx_q->dev_private;
616
617         if (internals->active_slave_count < 1)
618                 return 0;
619
620         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
621                         bufs, nb_pkts);
622 }
623
624 static inline uint16_t
625 ether_hash(struct rte_ether_hdr *eth_hdr)
626 {
627         unaligned_uint16_t *word_src_addr =
628                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
629         unaligned_uint16_t *word_dst_addr =
630                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
631
632         return (word_src_addr[0] ^ word_dst_addr[0]) ^
633                         (word_src_addr[1] ^ word_dst_addr[1]) ^
634                         (word_src_addr[2] ^ word_dst_addr[2]);
635 }
636
637 static inline uint32_t
638 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
639 {
640         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
641 }
642
643 static inline uint32_t
644 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
645 {
646         unaligned_uint32_t *word_src_addr =
647                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
648         unaligned_uint32_t *word_dst_addr =
649                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
650
651         return (word_src_addr[0] ^ word_dst_addr[0]) ^
652                         (word_src_addr[1] ^ word_dst_addr[1]) ^
653                         (word_src_addr[2] ^ word_dst_addr[2]) ^
654                         (word_src_addr[3] ^ word_dst_addr[3]);
655 }
656
657
658 void
659 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
660                 uint16_t slave_count, uint16_t *slaves)
661 {
662         struct rte_ether_hdr *eth_hdr;
663         uint32_t hash;
664         int i;
665
666         for (i = 0; i < nb_pkts; i++) {
667                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
668
669                 hash = ether_hash(eth_hdr);
670
671                 slaves[i] = (hash ^= hash >> 8) % slave_count;
672         }
673 }
674
675 void
676 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
677                 uint16_t slave_count, uint16_t *slaves)
678 {
679         uint16_t i;
680         struct rte_ether_hdr *eth_hdr;
681         uint16_t proto;
682         size_t vlan_offset;
683         uint32_t hash, l3hash;
684
685         for (i = 0; i < nb_pkts; i++) {
686                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
687                 l3hash = 0;
688
689                 proto = eth_hdr->ether_type;
690                 hash = ether_hash(eth_hdr);
691
692                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
693
694                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
695                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
696                                         ((char *)(eth_hdr + 1) + vlan_offset);
697                         l3hash = ipv4_hash(ipv4_hdr);
698
699                 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
700                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
701                                         ((char *)(eth_hdr + 1) + vlan_offset);
702                         l3hash = ipv6_hash(ipv6_hdr);
703                 }
704
705                 hash = hash ^ l3hash;
706                 hash ^= hash >> 16;
707                 hash ^= hash >> 8;
708
709                 slaves[i] = hash % slave_count;
710         }
711 }
712
713 void
714 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
715                 uint16_t slave_count, uint16_t *slaves)
716 {
717         struct rte_ether_hdr *eth_hdr;
718         uint16_t proto;
719         size_t vlan_offset;
720         int i;
721
722         struct rte_udp_hdr *udp_hdr;
723         struct rte_tcp_hdr *tcp_hdr;
724         uint32_t hash, l3hash, l4hash;
725
726         for (i = 0; i < nb_pkts; i++) {
727                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
728                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
729                 proto = eth_hdr->ether_type;
730                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
731                 l3hash = 0;
732                 l4hash = 0;
733
734                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
735                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
736                                         ((char *)(eth_hdr + 1) + vlan_offset);
737                         size_t ip_hdr_offset;
738
739                         l3hash = ipv4_hash(ipv4_hdr);
740
741                         /* there is no L4 header in fragmented packet */
742                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
743                                                                 == 0)) {
744                                 ip_hdr_offset = (ipv4_hdr->version_ihl
745                                         & RTE_IPV4_HDR_IHL_MASK) *
746                                         RTE_IPV4_IHL_MULTIPLIER;
747
748                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
749                                         tcp_hdr = (struct rte_tcp_hdr *)
750                                                 ((char *)ipv4_hdr +
751                                                         ip_hdr_offset);
752                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
753                                                         < pkt_end)
754                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
755                                 } else if (ipv4_hdr->next_proto_id ==
756                                                                 IPPROTO_UDP) {
757                                         udp_hdr = (struct rte_udp_hdr *)
758                                                 ((char *)ipv4_hdr +
759                                                         ip_hdr_offset);
760                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
761                                                         < pkt_end)
762                                                 l4hash = HASH_L4_PORTS(udp_hdr);
763                                 }
764                         }
765                 } else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
766                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
767                                         ((char *)(eth_hdr + 1) + vlan_offset);
768                         l3hash = ipv6_hash(ipv6_hdr);
769
770                         if (ipv6_hdr->proto == IPPROTO_TCP) {
771                                 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
772                                 l4hash = HASH_L4_PORTS(tcp_hdr);
773                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
774                                 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
775                                 l4hash = HASH_L4_PORTS(udp_hdr);
776                         }
777                 }
778
779                 hash = l3hash ^ l4hash;
780                 hash ^= hash >> 16;
781                 hash ^= hash >> 8;
782
783                 slaves[i] = hash % slave_count;
784         }
785 }
786
787 struct bwg_slave {
788         uint64_t bwg_left_int;
789         uint64_t bwg_left_remainder;
790         uint16_t slave;
791 };
792
793 void
794 bond_tlb_activate_slave(struct bond_dev_private *internals) {
795         int i;
796
797         for (i = 0; i < internals->active_slave_count; i++) {
798                 tlb_last_obytets[internals->active_slaves[i]] = 0;
799         }
800 }
801
802 static int
803 bandwidth_cmp(const void *a, const void *b)
804 {
805         const struct bwg_slave *bwg_a = a;
806         const struct bwg_slave *bwg_b = b;
807         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
808         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
809                         (int64_t)bwg_a->bwg_left_remainder;
810         if (diff > 0)
811                 return 1;
812         else if (diff < 0)
813                 return -1;
814         else if (diff2 > 0)
815                 return 1;
816         else if (diff2 < 0)
817                 return -1;
818         else
819                 return 0;
820 }
821
822 static void
823 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
824                 struct bwg_slave *bwg_slave)
825 {
826         struct rte_eth_link link_status;
827
828         rte_eth_link_get_nowait(port_id, &link_status);
829         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
830         if (link_bwg == 0)
831                 return;
832         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
833         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
834         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
835 }
836
837 static void
838 bond_ethdev_update_tlb_slave_cb(void *arg)
839 {
840         struct bond_dev_private *internals = arg;
841         struct rte_eth_stats slave_stats;
842         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
843         uint16_t slave_count;
844         uint64_t tx_bytes;
845
846         uint8_t update_stats = 0;
847         uint16_t slave_id;
848         uint16_t i;
849
850         internals->slave_update_idx++;
851
852
853         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
854                 update_stats = 1;
855
856         for (i = 0; i < internals->active_slave_count; i++) {
857                 slave_id = internals->active_slaves[i];
858                 rte_eth_stats_get(slave_id, &slave_stats);
859                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
860                 bandwidth_left(slave_id, tx_bytes,
861                                 internals->slave_update_idx, &bwg_array[i]);
862                 bwg_array[i].slave = slave_id;
863
864                 if (update_stats) {
865                         tlb_last_obytets[slave_id] = slave_stats.obytes;
866                 }
867         }
868
869         if (update_stats == 1)
870                 internals->slave_update_idx = 0;
871
872         slave_count = i;
873         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
874         for (i = 0; i < slave_count; i++)
875                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
876
877         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
878                         (struct bond_dev_private *)internals);
879 }
880
881 static uint16_t
882 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
883 {
884         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
885         struct bond_dev_private *internals = bd_tx_q->dev_private;
886
887         struct rte_eth_dev *primary_port =
888                         &rte_eth_devices[internals->primary_port];
889         uint16_t num_tx_total = 0;
890         uint16_t i, j;
891
892         uint16_t num_of_slaves = internals->active_slave_count;
893         uint16_t slaves[RTE_MAX_ETHPORTS];
894
895         struct rte_ether_hdr *ether_hdr;
896         struct rte_ether_addr primary_slave_addr;
897         struct rte_ether_addr active_slave_addr;
898
899         if (num_of_slaves < 1)
900                 return num_tx_total;
901
902         memcpy(slaves, internals->tlb_slaves_order,
903                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
904
905
906         rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
907
908         if (nb_pkts > 3) {
909                 for (i = 0; i < 3; i++)
910                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
911         }
912
913         for (i = 0; i < num_of_slaves; i++) {
914                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
915                 for (j = num_tx_total; j < nb_pkts; j++) {
916                         if (j + 3 < nb_pkts)
917                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
918
919                         ether_hdr = rte_pktmbuf_mtod(bufs[j],
920                                                 struct rte_ether_hdr *);
921                         if (rte_is_same_ether_addr(&ether_hdr->s_addr,
922                                                         &primary_slave_addr))
923                                 rte_ether_addr_copy(&active_slave_addr,
924                                                 &ether_hdr->s_addr);
925 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
926                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
927 #endif
928                 }
929
930                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
931                                 bufs + num_tx_total, nb_pkts - num_tx_total);
932
933                 if (num_tx_total == nb_pkts)
934                         break;
935         }
936
937         return num_tx_total;
938 }
939
940 void
941 bond_tlb_disable(struct bond_dev_private *internals)
942 {
943         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
944 }
945
946 void
947 bond_tlb_enable(struct bond_dev_private *internals)
948 {
949         bond_ethdev_update_tlb_slave_cb(internals);
950 }
951
952 static uint16_t
953 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
954 {
955         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
956         struct bond_dev_private *internals = bd_tx_q->dev_private;
957
958         struct rte_ether_hdr *eth_h;
959         uint16_t ether_type, offset;
960
961         struct client_data *client_info;
962
963         /*
964          * We create transmit buffers for every slave and one additional to send
965          * through tlb. In worst case every packet will be send on one port.
966          */
967         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
968         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
969
970         /*
971          * We create separate transmit buffers for update packets as they won't
972          * be counted in num_tx_total.
973          */
974         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
975         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
976
977         struct rte_mbuf *upd_pkt;
978         size_t pkt_size;
979
980         uint16_t num_send, num_not_send = 0;
981         uint16_t num_tx_total = 0;
982         uint16_t slave_idx;
983
984         int i, j;
985
986         /* Search tx buffer for ARP packets and forward them to alb */
987         for (i = 0; i < nb_pkts; i++) {
988                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
989                 ether_type = eth_h->ether_type;
990                 offset = get_vlan_offset(eth_h, &ether_type);
991
992                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
993                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
994
995                         /* Change src mac in eth header */
996                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
997
998                         /* Add packet to slave tx buffer */
999                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1000                         slave_bufs_pkts[slave_idx]++;
1001                 } else {
1002                         /* If packet is not ARP, send it with TLB policy */
1003                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1004                                         bufs[i];
1005                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1006                 }
1007         }
1008
1009         /* Update connected client ARP tables */
1010         if (internals->mode6.ntt) {
1011                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1012                         client_info = &internals->mode6.client_table[i];
1013
1014                         if (client_info->in_use) {
1015                                 /* Allocate new packet to send ARP update on current slave */
1016                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1017                                 if (upd_pkt == NULL) {
1018                                         RTE_BOND_LOG(ERR,
1019                                                      "Failed to allocate ARP packet from pool");
1020                                         continue;
1021                                 }
1022                                 pkt_size = sizeof(struct rte_ether_hdr) +
1023                                         sizeof(struct rte_arp_hdr) +
1024                                         client_info->vlan_count *
1025                                         sizeof(struct rte_vlan_hdr);
1026                                 upd_pkt->data_len = pkt_size;
1027                                 upd_pkt->pkt_len = pkt_size;
1028
1029                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1030                                                 internals);
1031
1032                                 /* Add packet to update tx buffer */
1033                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1034                                 update_bufs_pkts[slave_idx]++;
1035                         }
1036                 }
1037                 internals->mode6.ntt = 0;
1038         }
1039
1040         /* Send ARP packets on proper slaves */
1041         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1042                 if (slave_bufs_pkts[i] > 0) {
1043                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1044                                         slave_bufs[i], slave_bufs_pkts[i]);
1045                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1046                                 bufs[nb_pkts - 1 - num_not_send - j] =
1047                                                 slave_bufs[i][nb_pkts - 1 - j];
1048                         }
1049
1050                         num_tx_total += num_send;
1051                         num_not_send += slave_bufs_pkts[i] - num_send;
1052
1053 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1054         /* Print TX stats including update packets */
1055                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1056                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1057                                                         struct rte_ether_hdr *);
1058                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1059                         }
1060 #endif
1061                 }
1062         }
1063
1064         /* Send update packets on proper slaves */
1065         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066                 if (update_bufs_pkts[i] > 0) {
1067                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1068                                         update_bufs_pkts[i]);
1069                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1070                                 rte_pktmbuf_free(update_bufs[i][j]);
1071                         }
1072 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1073                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1074                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1075                                                         struct rte_ether_hdr *);
1076                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1077                         }
1078 #endif
1079                 }
1080         }
1081
1082         /* Send non-ARP packets using tlb policy */
1083         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1084                 num_send = bond_ethdev_tx_burst_tlb(queue,
1085                                 slave_bufs[RTE_MAX_ETHPORTS],
1086                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1087
1088                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1089                         bufs[nb_pkts - 1 - num_not_send - j] =
1090                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1091                 }
1092
1093                 num_tx_total += num_send;
1094         }
1095
1096         return num_tx_total;
1097 }
1098
1099 static inline uint16_t
1100 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1101                  uint16_t *slave_port_ids, uint16_t slave_count)
1102 {
1103         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1104         struct bond_dev_private *internals = bd_tx_q->dev_private;
1105
1106         /* Array to sort mbufs for transmission on each slave into */
1107         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1108         /* Number of mbufs for transmission on each slave */
1109         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1110         /* Mapping array generated by hash function to map mbufs to slaves */
1111         uint16_t bufs_slave_port_idxs[nb_bufs];
1112
1113         uint16_t slave_tx_count;
1114         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1115
1116         uint16_t i;
1117
1118         /*
1119          * Populate slaves mbuf with the packets which are to be sent on it
1120          * selecting output slave using hash based on xmit policy
1121          */
1122         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1123                         bufs_slave_port_idxs);
1124
1125         for (i = 0; i < nb_bufs; i++) {
1126                 /* Populate slave mbuf arrays with mbufs for that slave. */
1127                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1128
1129                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1130         }
1131
1132         /* Send packet burst on each slave device */
1133         for (i = 0; i < slave_count; i++) {
1134                 if (slave_nb_bufs[i] == 0)
1135                         continue;
1136
1137                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1138                                 bd_tx_q->queue_id, slave_bufs[i],
1139                                 slave_nb_bufs[i]);
1140
1141                 total_tx_count += slave_tx_count;
1142
1143                 /* If tx burst fails move packets to end of bufs */
1144                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1145                         int slave_tx_fail_count = slave_nb_bufs[i] -
1146                                         slave_tx_count;
1147                         total_tx_fail_count += slave_tx_fail_count;
1148                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1149                                &slave_bufs[i][slave_tx_count],
1150                                slave_tx_fail_count * sizeof(bufs[0]));
1151                 }
1152         }
1153
1154         return total_tx_count;
1155 }
1156
1157 static uint16_t
1158 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1159                 uint16_t nb_bufs)
1160 {
1161         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1162         struct bond_dev_private *internals = bd_tx_q->dev_private;
1163
1164         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1165         uint16_t slave_count;
1166
1167         if (unlikely(nb_bufs == 0))
1168                 return 0;
1169
1170         /* Copy slave list to protect against slave up/down changes during tx
1171          * bursting
1172          */
1173         slave_count = internals->active_slave_count;
1174         if (unlikely(slave_count < 1))
1175                 return 0;
1176
1177         memcpy(slave_port_ids, internals->active_slaves,
1178                         sizeof(slave_port_ids[0]) * slave_count);
1179         return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1180                                 slave_count);
1181 }
1182
1183 static inline uint16_t
1184 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1185                 bool dedicated_txq)
1186 {
1187         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1188         struct bond_dev_private *internals = bd_tx_q->dev_private;
1189
1190         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1191         uint16_t slave_count;
1192
1193         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1194         uint16_t dist_slave_count;
1195
1196         uint16_t slave_tx_count;
1197
1198         uint16_t i;
1199
1200         /* Copy slave list to protect against slave up/down changes during tx
1201          * bursting */
1202         slave_count = internals->active_slave_count;
1203         if (unlikely(slave_count < 1))
1204                 return 0;
1205
1206         memcpy(slave_port_ids, internals->active_slaves,
1207                         sizeof(slave_port_ids[0]) * slave_count);
1208
1209         if (dedicated_txq)
1210                 goto skip_tx_ring;
1211
1212         /* Check for LACP control packets and send if available */
1213         for (i = 0; i < slave_count; i++) {
1214                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1215                 struct rte_mbuf *ctrl_pkt = NULL;
1216
1217                 if (likely(rte_ring_empty(port->tx_ring)))
1218                         continue;
1219
1220                 if (rte_ring_dequeue(port->tx_ring,
1221                                      (void **)&ctrl_pkt) != -ENOENT) {
1222                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1223                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1224                         /*
1225                          * re-enqueue LAG control plane packets to buffering
1226                          * ring if transmission fails so the packet isn't lost.
1227                          */
1228                         if (slave_tx_count != 1)
1229                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1230                 }
1231         }
1232
1233 skip_tx_ring:
1234         if (unlikely(nb_bufs == 0))
1235                 return 0;
1236
1237         dist_slave_count = 0;
1238         for (i = 0; i < slave_count; i++) {
1239                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1240
1241                 if (ACTOR_STATE(port, DISTRIBUTING))
1242                         dist_slave_port_ids[dist_slave_count++] =
1243                                         slave_port_ids[i];
1244         }
1245
1246         if (unlikely(dist_slave_count < 1))
1247                 return 0;
1248
1249         return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1250                                 dist_slave_count);
1251 }
1252
1253 static uint16_t
1254 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1255                 uint16_t nb_bufs)
1256 {
1257         return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1258 }
1259
1260 static uint16_t
1261 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1262                 uint16_t nb_bufs)
1263 {
1264         return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1265 }
1266
1267 static uint16_t
1268 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1269                 uint16_t nb_pkts)
1270 {
1271         struct bond_dev_private *internals;
1272         struct bond_tx_queue *bd_tx_q;
1273
1274         uint16_t slaves[RTE_MAX_ETHPORTS];
1275         uint8_t tx_failed_flag = 0;
1276         uint16_t num_of_slaves;
1277
1278         uint16_t max_nb_of_tx_pkts = 0;
1279
1280         int slave_tx_total[RTE_MAX_ETHPORTS];
1281         int i, most_successful_tx_slave = -1;
1282
1283         bd_tx_q = (struct bond_tx_queue *)queue;
1284         internals = bd_tx_q->dev_private;
1285
1286         /* Copy slave list to protect against slave up/down changes during tx
1287          * bursting */
1288         num_of_slaves = internals->active_slave_count;
1289         memcpy(slaves, internals->active_slaves,
1290                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1291
1292         if (num_of_slaves < 1)
1293                 return 0;
1294
1295         /* Increment reference count on mbufs */
1296         for (i = 0; i < nb_pkts; i++)
1297                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1298
1299         /* Transmit burst on each active slave */
1300         for (i = 0; i < num_of_slaves; i++) {
1301                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1302                                         bufs, nb_pkts);
1303
1304                 if (unlikely(slave_tx_total[i] < nb_pkts))
1305                         tx_failed_flag = 1;
1306
1307                 /* record the value and slave index for the slave which transmits the
1308                  * maximum number of packets */
1309                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1310                         max_nb_of_tx_pkts = slave_tx_total[i];
1311                         most_successful_tx_slave = i;
1312                 }
1313         }
1314
1315         /* if slaves fail to transmit packets from burst, the calling application
1316          * is not expected to know about multiple references to packets so we must
1317          * handle failures of all packets except those of the most successful slave
1318          */
1319         if (unlikely(tx_failed_flag))
1320                 for (i = 0; i < num_of_slaves; i++)
1321                         if (i != most_successful_tx_slave)
1322                                 while (slave_tx_total[i] < nb_pkts)
1323                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1324
1325         return max_nb_of_tx_pkts;
1326 }
1327
1328 static void
1329 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1330 {
1331         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1332
1333         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1334                 /**
1335                  * If in mode 4 then save the link properties of the first
1336                  * slave, all subsequent slaves must match these properties
1337                  */
1338                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1339
1340                 bond_link->link_autoneg = slave_link->link_autoneg;
1341                 bond_link->link_duplex = slave_link->link_duplex;
1342                 bond_link->link_speed = slave_link->link_speed;
1343         } else {
1344                 /**
1345                  * In any other mode the link properties are set to default
1346                  * values of AUTONEG/DUPLEX
1347                  */
1348                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1349                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1350         }
1351 }
1352
1353 static int
1354 link_properties_valid(struct rte_eth_dev *ethdev,
1355                 struct rte_eth_link *slave_link)
1356 {
1357         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1358
1359         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1360                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1361
1362                 if (bond_link->link_duplex != slave_link->link_duplex ||
1363                         bond_link->link_autoneg != slave_link->link_autoneg ||
1364                         bond_link->link_speed != slave_link->link_speed)
1365                         return -1;
1366         }
1367
1368         return 0;
1369 }
1370
1371 int
1372 mac_address_get(struct rte_eth_dev *eth_dev,
1373                 struct rte_ether_addr *dst_mac_addr)
1374 {
1375         struct rte_ether_addr *mac_addr;
1376
1377         if (eth_dev == NULL) {
1378                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1379                 return -1;
1380         }
1381
1382         if (dst_mac_addr == NULL) {
1383                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1384                 return -1;
1385         }
1386
1387         mac_addr = eth_dev->data->mac_addrs;
1388
1389         rte_ether_addr_copy(mac_addr, dst_mac_addr);
1390         return 0;
1391 }
1392
1393 int
1394 mac_address_set(struct rte_eth_dev *eth_dev,
1395                 struct rte_ether_addr *new_mac_addr)
1396 {
1397         struct rte_ether_addr *mac_addr;
1398
1399         if (eth_dev == NULL) {
1400                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1401                 return -1;
1402         }
1403
1404         if (new_mac_addr == NULL) {
1405                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1406                 return -1;
1407         }
1408
1409         mac_addr = eth_dev->data->mac_addrs;
1410
1411         /* If new MAC is different to current MAC then update */
1412         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1413                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1414
1415         return 0;
1416 }
1417
1418 static const struct rte_ether_addr null_mac_addr;
1419
1420 /*
1421  * Add additional MAC addresses to the slave
1422  */
1423 int
1424 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1425                 uint16_t slave_port_id)
1426 {
1427         int i, ret;
1428         struct rte_ether_addr *mac_addr;
1429
1430         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1431                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1432                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1433                         break;
1434
1435                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1436                 if (ret < 0) {
1437                         /* rollback */
1438                         for (i--; i > 0; i--)
1439                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1440                                         &bonded_eth_dev->data->mac_addrs[i]);
1441                         return ret;
1442                 }
1443         }
1444
1445         return 0;
1446 }
1447
1448 /*
1449  * Remove additional MAC addresses from the slave
1450  */
1451 int
1452 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453                 uint16_t slave_port_id)
1454 {
1455         int i, rc, ret;
1456         struct rte_ether_addr *mac_addr;
1457
1458         rc = 0;
1459         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1460                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1461                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1462                         break;
1463
1464                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1465                 /* save only the first error */
1466                 if (ret < 0 && rc == 0)
1467                         rc = ret;
1468         }
1469
1470         return rc;
1471 }
1472
1473 int
1474 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1475 {
1476         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1477         int i;
1478
1479         /* Update slave devices MAC addresses */
1480         if (internals->slave_count < 1)
1481                 return -1;
1482
1483         switch (internals->mode) {
1484         case BONDING_MODE_ROUND_ROBIN:
1485         case BONDING_MODE_BALANCE:
1486         case BONDING_MODE_BROADCAST:
1487                 for (i = 0; i < internals->slave_count; i++) {
1488                         if (rte_eth_dev_default_mac_addr_set(
1489                                         internals->slaves[i].port_id,
1490                                         bonded_eth_dev->data->mac_addrs)) {
1491                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1492                                                 internals->slaves[i].port_id);
1493                                 return -1;
1494                         }
1495                 }
1496                 break;
1497         case BONDING_MODE_8023AD:
1498                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1499                 break;
1500         case BONDING_MODE_ACTIVE_BACKUP:
1501         case BONDING_MODE_TLB:
1502         case BONDING_MODE_ALB:
1503         default:
1504                 for (i = 0; i < internals->slave_count; i++) {
1505                         if (internals->slaves[i].port_id ==
1506                                         internals->current_primary_port) {
1507                                 if (rte_eth_dev_default_mac_addr_set(
1508                                                 internals->primary_port,
1509                                                 bonded_eth_dev->data->mac_addrs)) {
1510                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1511                                                         internals->current_primary_port);
1512                                         return -1;
1513                                 }
1514                         } else {
1515                                 if (rte_eth_dev_default_mac_addr_set(
1516                                                 internals->slaves[i].port_id,
1517                                                 &internals->slaves[i].persisted_mac_addr)) {
1518                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1519                                                         internals->slaves[i].port_id);
1520                                         return -1;
1521                                 }
1522                         }
1523                 }
1524         }
1525
1526         return 0;
1527 }
1528
1529 int
1530 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1531 {
1532         struct bond_dev_private *internals;
1533
1534         internals = eth_dev->data->dev_private;
1535
1536         switch (mode) {
1537         case BONDING_MODE_ROUND_ROBIN:
1538                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1539                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1540                 break;
1541         case BONDING_MODE_ACTIVE_BACKUP:
1542                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1543                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1544                 break;
1545         case BONDING_MODE_BALANCE:
1546                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1547                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1548                 break;
1549         case BONDING_MODE_BROADCAST:
1550                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1551                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1552                 break;
1553         case BONDING_MODE_8023AD:
1554                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1555                         return -1;
1556
1557                 if (internals->mode4.dedicated_queues.enabled == 0) {
1558                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1559                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1560                         RTE_BOND_LOG(WARNING,
1561                                 "Using mode 4, it is necessary to do TX burst "
1562                                 "and RX burst at least every 100ms.");
1563                 } else {
1564                         /* Use flow director's optimization */
1565                         eth_dev->rx_pkt_burst =
1566                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1567                         eth_dev->tx_pkt_burst =
1568                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1569                 }
1570                 break;
1571         case BONDING_MODE_TLB:
1572                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1573                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1574                 break;
1575         case BONDING_MODE_ALB:
1576                 if (bond_mode_alb_enable(eth_dev) != 0)
1577                         return -1;
1578
1579                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1580                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1581                 break;
1582         default:
1583                 return -1;
1584         }
1585
1586         internals->mode = mode;
1587
1588         return 0;
1589 }
1590
1591
1592 static int
1593 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1594                 struct rte_eth_dev *slave_eth_dev)
1595 {
1596         int errval = 0;
1597         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1598         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1599
1600         if (port->slow_pool == NULL) {
1601                 char mem_name[256];
1602                 int slave_id = slave_eth_dev->data->port_id;
1603
1604                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1605                                 slave_id);
1606                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1607                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1608                         slave_eth_dev->data->numa_node);
1609
1610                 /* Any memory allocation failure in initialization is critical because
1611                  * resources can't be free, so reinitialization is impossible. */
1612                 if (port->slow_pool == NULL) {
1613                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1614                                 slave_id, mem_name, rte_strerror(rte_errno));
1615                 }
1616         }
1617
1618         if (internals->mode4.dedicated_queues.enabled == 1) {
1619                 /* Configure slow Rx queue */
1620
1621                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1622                                 internals->mode4.dedicated_queues.rx_qid, 128,
1623                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1624                                 NULL, port->slow_pool);
1625                 if (errval != 0) {
1626                         RTE_BOND_LOG(ERR,
1627                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1628                                         slave_eth_dev->data->port_id,
1629                                         internals->mode4.dedicated_queues.rx_qid,
1630                                         errval);
1631                         return errval;
1632                 }
1633
1634                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1635                                 internals->mode4.dedicated_queues.tx_qid, 512,
1636                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1637                                 NULL);
1638                 if (errval != 0) {
1639                         RTE_BOND_LOG(ERR,
1640                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1641                                 slave_eth_dev->data->port_id,
1642                                 internals->mode4.dedicated_queues.tx_qid,
1643                                 errval);
1644                         return errval;
1645                 }
1646         }
1647         return 0;
1648 }
1649
1650 int
1651 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1652                 struct rte_eth_dev *slave_eth_dev)
1653 {
1654         struct bond_rx_queue *bd_rx_q;
1655         struct bond_tx_queue *bd_tx_q;
1656         uint16_t nb_rx_queues;
1657         uint16_t nb_tx_queues;
1658
1659         int errval;
1660         uint16_t q_id;
1661         struct rte_flow_error flow_error;
1662
1663         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1664
1665         /* Stop slave */
1666         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1667
1668         /* Enable interrupts on slave device if supported */
1669         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1670                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1671
1672         /* If RSS is enabled for bonding, try to enable it for slaves  */
1673         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1674                 if (internals->rss_key_len != 0) {
1675                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1676                                         internals->rss_key_len;
1677                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1678                                         internals->rss_key;
1679                 } else {
1680                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1681                 }
1682
1683                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1684                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1685                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1686                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1687         }
1688
1689         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1690                         DEV_RX_OFFLOAD_VLAN_FILTER)
1691                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1692                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1693         else
1694                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1695                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1696
1697         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1698         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1699
1700         if (internals->mode == BONDING_MODE_8023AD) {
1701                 if (internals->mode4.dedicated_queues.enabled == 1) {
1702                         nb_rx_queues++;
1703                         nb_tx_queues++;
1704                 }
1705         }
1706
1707         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1708                                      bonded_eth_dev->data->mtu);
1709         if (errval != 0 && errval != -ENOTSUP) {
1710                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1711                                 slave_eth_dev->data->port_id, errval);
1712                 return errval;
1713         }
1714
1715         /* Configure device */
1716         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1717                         nb_rx_queues, nb_tx_queues,
1718                         &(slave_eth_dev->data->dev_conf));
1719         if (errval != 0) {
1720                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1721                                 slave_eth_dev->data->port_id, errval);
1722                 return errval;
1723         }
1724
1725         /* Setup Rx Queues */
1726         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1727                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1728
1729                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1730                                 bd_rx_q->nb_rx_desc,
1731                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1732                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1733                 if (errval != 0) {
1734                         RTE_BOND_LOG(ERR,
1735                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1736                                         slave_eth_dev->data->port_id, q_id, errval);
1737                         return errval;
1738                 }
1739         }
1740
1741         /* Setup Tx Queues */
1742         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1743                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1744
1745                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1746                                 bd_tx_q->nb_tx_desc,
1747                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1748                                 &bd_tx_q->tx_conf);
1749                 if (errval != 0) {
1750                         RTE_BOND_LOG(ERR,
1751                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1752                                 slave_eth_dev->data->port_id, q_id, errval);
1753                         return errval;
1754                 }
1755         }
1756
1757         if (internals->mode == BONDING_MODE_8023AD &&
1758                         internals->mode4.dedicated_queues.enabled == 1) {
1759                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1760                                 != 0)
1761                         return errval;
1762
1763                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1764                                 slave_eth_dev->data->port_id) != 0) {
1765                         RTE_BOND_LOG(ERR,
1766                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1767                                 slave_eth_dev->data->port_id, q_id, errval);
1768                         return -1;
1769                 }
1770
1771                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1772                         rte_flow_destroy(slave_eth_dev->data->port_id,
1773                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1774                                         &flow_error);
1775
1776                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1777                                 slave_eth_dev->data->port_id);
1778         }
1779
1780         /* Start device */
1781         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1782         if (errval != 0) {
1783                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1784                                 slave_eth_dev->data->port_id, errval);
1785                 return -1;
1786         }
1787
1788         /* If RSS is enabled for bonding, synchronize RETA */
1789         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1790                 int i;
1791                 struct bond_dev_private *internals;
1792
1793                 internals = bonded_eth_dev->data->dev_private;
1794
1795                 for (i = 0; i < internals->slave_count; i++) {
1796                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1797                                 errval = rte_eth_dev_rss_reta_update(
1798                                                 slave_eth_dev->data->port_id,
1799                                                 &internals->reta_conf[0],
1800                                                 internals->slaves[i].reta_size);
1801                                 if (errval != 0) {
1802                                         RTE_BOND_LOG(WARNING,
1803                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1804                                                      " RSS Configuration for bonding may be inconsistent.",
1805                                                      slave_eth_dev->data->port_id, errval);
1806                                 }
1807                                 break;
1808                         }
1809                 }
1810         }
1811
1812         /* If lsc interrupt is set, check initial slave's link status */
1813         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1814                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1815                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1816                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1817                         NULL);
1818         }
1819
1820         return 0;
1821 }
1822
1823 void
1824 slave_remove(struct bond_dev_private *internals,
1825                 struct rte_eth_dev *slave_eth_dev)
1826 {
1827         uint16_t i;
1828
1829         for (i = 0; i < internals->slave_count; i++)
1830                 if (internals->slaves[i].port_id ==
1831                                 slave_eth_dev->data->port_id)
1832                         break;
1833
1834         if (i < (internals->slave_count - 1)) {
1835                 struct rte_flow *flow;
1836
1837                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1838                                 sizeof(internals->slaves[0]) *
1839                                 (internals->slave_count - i - 1));
1840                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1841                         memmove(&flow->flows[i], &flow->flows[i + 1],
1842                                 sizeof(flow->flows[0]) *
1843                                 (internals->slave_count - i - 1));
1844                         flow->flows[internals->slave_count - 1] = NULL;
1845                 }
1846         }
1847
1848         internals->slave_count--;
1849
1850         /* force reconfiguration of slave interfaces */
1851         _rte_eth_dev_reset(slave_eth_dev);
1852 }
1853
1854 static void
1855 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1856
1857 void
1858 slave_add(struct bond_dev_private *internals,
1859                 struct rte_eth_dev *slave_eth_dev)
1860 {
1861         struct bond_slave_details *slave_details =
1862                         &internals->slaves[internals->slave_count];
1863
1864         slave_details->port_id = slave_eth_dev->data->port_id;
1865         slave_details->last_link_status = 0;
1866
1867         /* Mark slave devices that don't support interrupts so we can
1868          * compensate when we start the bond
1869          */
1870         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1871                 slave_details->link_status_poll_enabled = 1;
1872         }
1873
1874         slave_details->link_status_wait_to_complete = 0;
1875         /* clean tlb_last_obytes when adding port for bonding device */
1876         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1877                         sizeof(struct rte_ether_addr));
1878 }
1879
1880 void
1881 bond_ethdev_primary_set(struct bond_dev_private *internals,
1882                 uint16_t slave_port_id)
1883 {
1884         int i;
1885
1886         if (internals->active_slave_count < 1)
1887                 internals->current_primary_port = slave_port_id;
1888         else
1889                 /* Search bonded device slave ports for new proposed primary port */
1890                 for (i = 0; i < internals->active_slave_count; i++) {
1891                         if (internals->active_slaves[i] == slave_port_id)
1892                                 internals->current_primary_port = slave_port_id;
1893                 }
1894 }
1895
1896 static void
1897 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1898
1899 static int
1900 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1901 {
1902         struct bond_dev_private *internals;
1903         int i;
1904
1905         /* slave eth dev will be started by bonded device */
1906         if (check_for_bonded_ethdev(eth_dev)) {
1907                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1908                                 eth_dev->data->port_id);
1909                 return -1;
1910         }
1911
1912         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1913         eth_dev->data->dev_started = 1;
1914
1915         internals = eth_dev->data->dev_private;
1916
1917         if (internals->slave_count == 0) {
1918                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1919                 goto out_err;
1920         }
1921
1922         if (internals->user_defined_mac == 0) {
1923                 struct rte_ether_addr *new_mac_addr = NULL;
1924
1925                 for (i = 0; i < internals->slave_count; i++)
1926                         if (internals->slaves[i].port_id == internals->primary_port)
1927                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1928
1929                 if (new_mac_addr == NULL)
1930                         goto out_err;
1931
1932                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1933                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1934                                         eth_dev->data->port_id);
1935                         goto out_err;
1936                 }
1937         }
1938
1939         /* If bonded device is configure in promiscuous mode then re-apply config */
1940         if (internals->promiscuous_en)
1941                 bond_ethdev_promiscuous_enable(eth_dev);
1942
1943         if (internals->mode == BONDING_MODE_8023AD) {
1944                 if (internals->mode4.dedicated_queues.enabled == 1) {
1945                         internals->mode4.dedicated_queues.rx_qid =
1946                                         eth_dev->data->nb_rx_queues;
1947                         internals->mode4.dedicated_queues.tx_qid =
1948                                         eth_dev->data->nb_tx_queues;
1949                 }
1950         }
1951
1952
1953         /* Reconfigure each slave device if starting bonded device */
1954         for (i = 0; i < internals->slave_count; i++) {
1955                 struct rte_eth_dev *slave_ethdev =
1956                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1957                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1958                         RTE_BOND_LOG(ERR,
1959                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1960                                 eth_dev->data->port_id,
1961                                 internals->slaves[i].port_id);
1962                         goto out_err;
1963                 }
1964                 /* We will need to poll for link status if any slave doesn't
1965                  * support interrupts
1966                  */
1967                 if (internals->slaves[i].link_status_poll_enabled)
1968                         internals->link_status_polling_enabled = 1;
1969         }
1970
1971         /* start polling if needed */
1972         if (internals->link_status_polling_enabled) {
1973                 rte_eal_alarm_set(
1974                         internals->link_status_polling_interval_ms * 1000,
1975                         bond_ethdev_slave_link_status_change_monitor,
1976                         (void *)&rte_eth_devices[internals->port_id]);
1977         }
1978
1979         /* Update all slave devices MACs*/
1980         if (mac_address_slaves_update(eth_dev) != 0)
1981                 goto out_err;
1982
1983         if (internals->user_defined_primary_port)
1984                 bond_ethdev_primary_set(internals, internals->primary_port);
1985
1986         if (internals->mode == BONDING_MODE_8023AD)
1987                 bond_mode_8023ad_start(eth_dev);
1988
1989         if (internals->mode == BONDING_MODE_TLB ||
1990                         internals->mode == BONDING_MODE_ALB)
1991                 bond_tlb_enable(internals);
1992
1993         return 0;
1994
1995 out_err:
1996         eth_dev->data->dev_started = 0;
1997         return -1;
1998 }
1999
2000 static void
2001 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2002 {
2003         uint16_t i;
2004
2005         if (dev->data->rx_queues != NULL) {
2006                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2007                         rte_free(dev->data->rx_queues[i]);
2008                         dev->data->rx_queues[i] = NULL;
2009                 }
2010                 dev->data->nb_rx_queues = 0;
2011         }
2012
2013         if (dev->data->tx_queues != NULL) {
2014                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2015                         rte_free(dev->data->tx_queues[i]);
2016                         dev->data->tx_queues[i] = NULL;
2017                 }
2018                 dev->data->nb_tx_queues = 0;
2019         }
2020 }
2021
2022 void
2023 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2024 {
2025         struct bond_dev_private *internals = eth_dev->data->dev_private;
2026         uint16_t i;
2027
2028         if (internals->mode == BONDING_MODE_8023AD) {
2029                 struct port *port;
2030                 void *pkt = NULL;
2031
2032                 bond_mode_8023ad_stop(eth_dev);
2033
2034                 /* Discard all messages to/from mode 4 state machines */
2035                 for (i = 0; i < internals->active_slave_count; i++) {
2036                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2037
2038                         RTE_ASSERT(port->rx_ring != NULL);
2039                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2040                                 rte_pktmbuf_free(pkt);
2041
2042                         RTE_ASSERT(port->tx_ring != NULL);
2043                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2044                                 rte_pktmbuf_free(pkt);
2045                 }
2046         }
2047
2048         if (internals->mode == BONDING_MODE_TLB ||
2049                         internals->mode == BONDING_MODE_ALB) {
2050                 bond_tlb_disable(internals);
2051                 for (i = 0; i < internals->active_slave_count; i++)
2052                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2053         }
2054
2055         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2056         eth_dev->data->dev_started = 0;
2057
2058         internals->link_status_polling_enabled = 0;
2059         for (i = 0; i < internals->slave_count; i++) {
2060                 uint16_t slave_id = internals->slaves[i].port_id;
2061                 if (find_slave_by_id(internals->active_slaves,
2062                                 internals->active_slave_count, slave_id) !=
2063                                                 internals->active_slave_count) {
2064                         internals->slaves[i].last_link_status = 0;
2065                         rte_eth_dev_stop(slave_id);
2066                         deactivate_slave(eth_dev, slave_id);
2067                 }
2068         }
2069 }
2070
2071 void
2072 bond_ethdev_close(struct rte_eth_dev *dev)
2073 {
2074         struct bond_dev_private *internals = dev->data->dev_private;
2075         uint16_t bond_port_id = internals->port_id;
2076         int skipped = 0;
2077         struct rte_flow_error ferror;
2078
2079         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2080         while (internals->slave_count != skipped) {
2081                 uint16_t port_id = internals->slaves[skipped].port_id;
2082
2083                 rte_eth_dev_stop(port_id);
2084
2085                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2086                         RTE_BOND_LOG(ERR,
2087                                      "Failed to remove port %d from bonded device %s",
2088                                      port_id, dev->device->name);
2089                         skipped++;
2090                 }
2091         }
2092         bond_flow_ops.flush(dev, &ferror);
2093         bond_ethdev_free_queues(dev);
2094         rte_bitmap_reset(internals->vlan_filter_bmp);
2095 }
2096
2097 /* forward declaration */
2098 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2099
2100 static void
2101 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2102 {
2103         struct bond_dev_private *internals = dev->data->dev_private;
2104
2105         uint16_t max_nb_rx_queues = UINT16_MAX;
2106         uint16_t max_nb_tx_queues = UINT16_MAX;
2107         uint16_t max_rx_desc_lim = UINT16_MAX;
2108         uint16_t max_tx_desc_lim = UINT16_MAX;
2109
2110         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2111
2112         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2113                         internals->candidate_max_rx_pktlen :
2114                         RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2115
2116         /* Max number of tx/rx queues that the bonded device can support is the
2117          * minimum values of the bonded slaves, as all slaves must be capable
2118          * of supporting the same number of tx/rx queues.
2119          */
2120         if (internals->slave_count > 0) {
2121                 struct rte_eth_dev_info slave_info;
2122                 uint16_t idx;
2123
2124                 for (idx = 0; idx < internals->slave_count; idx++) {
2125                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2126                                         &slave_info);
2127
2128                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2129                                 max_nb_rx_queues = slave_info.max_rx_queues;
2130
2131                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2132                                 max_nb_tx_queues = slave_info.max_tx_queues;
2133
2134                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2135                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2136
2137                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2138                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2139                 }
2140         }
2141
2142         dev_info->max_rx_queues = max_nb_rx_queues;
2143         dev_info->max_tx_queues = max_nb_tx_queues;
2144
2145         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2146                sizeof(dev_info->default_rxconf));
2147         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2148                sizeof(dev_info->default_txconf));
2149
2150         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2151         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2152
2153         /**
2154          * If dedicated hw queues enabled for link bonding device in LACP mode
2155          * then we need to reduce the maximum number of data path queues by 1.
2156          */
2157         if (internals->mode == BONDING_MODE_8023AD &&
2158                 internals->mode4.dedicated_queues.enabled == 1) {
2159                 dev_info->max_rx_queues--;
2160                 dev_info->max_tx_queues--;
2161         }
2162
2163         dev_info->min_rx_bufsize = 0;
2164
2165         dev_info->rx_offload_capa = internals->rx_offload_capa;
2166         dev_info->tx_offload_capa = internals->tx_offload_capa;
2167         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2168         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2169         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2170
2171         dev_info->reta_size = internals->reta_size;
2172 }
2173
2174 static int
2175 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2176 {
2177         int res;
2178         uint16_t i;
2179         struct bond_dev_private *internals = dev->data->dev_private;
2180
2181         /* don't do this while a slave is being added */
2182         rte_spinlock_lock(&internals->lock);
2183
2184         if (on)
2185                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2186         else
2187                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2188
2189         for (i = 0; i < internals->slave_count; i++) {
2190                 uint16_t port_id = internals->slaves[i].port_id;
2191
2192                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2193                 if (res == ENOTSUP)
2194                         RTE_BOND_LOG(WARNING,
2195                                      "Setting VLAN filter on slave port %u not supported.",
2196                                      port_id);
2197         }
2198
2199         rte_spinlock_unlock(&internals->lock);
2200         return 0;
2201 }
2202
2203 static int
2204 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2205                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2206                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2207 {
2208         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2209                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2210                                         0, dev->data->numa_node);
2211         if (bd_rx_q == NULL)
2212                 return -1;
2213
2214         bd_rx_q->queue_id = rx_queue_id;
2215         bd_rx_q->dev_private = dev->data->dev_private;
2216
2217         bd_rx_q->nb_rx_desc = nb_rx_desc;
2218
2219         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2220         bd_rx_q->mb_pool = mb_pool;
2221
2222         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2223
2224         return 0;
2225 }
2226
2227 static int
2228 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2229                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2230                 const struct rte_eth_txconf *tx_conf)
2231 {
2232         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2233                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2234                                         0, dev->data->numa_node);
2235
2236         if (bd_tx_q == NULL)
2237                 return -1;
2238
2239         bd_tx_q->queue_id = tx_queue_id;
2240         bd_tx_q->dev_private = dev->data->dev_private;
2241
2242         bd_tx_q->nb_tx_desc = nb_tx_desc;
2243         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2244
2245         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2246
2247         return 0;
2248 }
2249
2250 static void
2251 bond_ethdev_rx_queue_release(void *queue)
2252 {
2253         if (queue == NULL)
2254                 return;
2255
2256         rte_free(queue);
2257 }
2258
2259 static void
2260 bond_ethdev_tx_queue_release(void *queue)
2261 {
2262         if (queue == NULL)
2263                 return;
2264
2265         rte_free(queue);
2266 }
2267
2268 static void
2269 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2270 {
2271         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2272         struct bond_dev_private *internals;
2273
2274         /* Default value for polling slave found is true as we don't want to
2275          * disable the polling thread if we cannot get the lock */
2276         int i, polling_slave_found = 1;
2277
2278         if (cb_arg == NULL)
2279                 return;
2280
2281         bonded_ethdev = cb_arg;
2282         internals = bonded_ethdev->data->dev_private;
2283
2284         if (!bonded_ethdev->data->dev_started ||
2285                 !internals->link_status_polling_enabled)
2286                 return;
2287
2288         /* If device is currently being configured then don't check slaves link
2289          * status, wait until next period */
2290         if (rte_spinlock_trylock(&internals->lock)) {
2291                 if (internals->slave_count > 0)
2292                         polling_slave_found = 0;
2293
2294                 for (i = 0; i < internals->slave_count; i++) {
2295                         if (!internals->slaves[i].link_status_poll_enabled)
2296                                 continue;
2297
2298                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2299                         polling_slave_found = 1;
2300
2301                         /* Update slave link status */
2302                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2303                                         internals->slaves[i].link_status_wait_to_complete);
2304
2305                         /* if link status has changed since last checked then call lsc
2306                          * event callback */
2307                         if (slave_ethdev->data->dev_link.link_status !=
2308                                         internals->slaves[i].last_link_status) {
2309                                 internals->slaves[i].last_link_status =
2310                                                 slave_ethdev->data->dev_link.link_status;
2311
2312                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2313                                                 RTE_ETH_EVENT_INTR_LSC,
2314                                                 &bonded_ethdev->data->port_id,
2315                                                 NULL);
2316                         }
2317                 }
2318                 rte_spinlock_unlock(&internals->lock);
2319         }
2320
2321         if (polling_slave_found)
2322                 /* Set alarm to continue monitoring link status of slave ethdev's */
2323                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2324                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2325 }
2326
2327 static int
2328 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2329 {
2330         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2331
2332         struct bond_dev_private *bond_ctx;
2333         struct rte_eth_link slave_link;
2334
2335         uint32_t idx;
2336
2337         bond_ctx = ethdev->data->dev_private;
2338
2339         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2340
2341         if (ethdev->data->dev_started == 0 ||
2342                         bond_ctx->active_slave_count == 0) {
2343                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2344                 return 0;
2345         }
2346
2347         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2348
2349         if (wait_to_complete)
2350                 link_update = rte_eth_link_get;
2351         else
2352                 link_update = rte_eth_link_get_nowait;
2353
2354         switch (bond_ctx->mode) {
2355         case BONDING_MODE_BROADCAST:
2356                 /**
2357                  * Setting link speed to UINT32_MAX to ensure we pick up the
2358                  * value of the first active slave
2359                  */
2360                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2361
2362                 /**
2363                  * link speed is minimum value of all the slaves link speed as
2364                  * packet loss will occur on this slave if transmission at rates
2365                  * greater than this are attempted
2366                  */
2367                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2368                         link_update(bond_ctx->active_slaves[0], &slave_link);
2369
2370                         if (slave_link.link_speed <
2371                                         ethdev->data->dev_link.link_speed)
2372                                 ethdev->data->dev_link.link_speed =
2373                                                 slave_link.link_speed;
2374                 }
2375                 break;
2376         case BONDING_MODE_ACTIVE_BACKUP:
2377                 /* Current primary slave */
2378                 link_update(bond_ctx->current_primary_port, &slave_link);
2379
2380                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2381                 break;
2382         case BONDING_MODE_8023AD:
2383                 ethdev->data->dev_link.link_autoneg =
2384                                 bond_ctx->mode4.slave_link.link_autoneg;
2385                 ethdev->data->dev_link.link_duplex =
2386                                 bond_ctx->mode4.slave_link.link_duplex;
2387                 /* fall through to update link speed */
2388         case BONDING_MODE_ROUND_ROBIN:
2389         case BONDING_MODE_BALANCE:
2390         case BONDING_MODE_TLB:
2391         case BONDING_MODE_ALB:
2392         default:
2393                 /**
2394                  * In theses mode the maximum theoretical link speed is the sum
2395                  * of all the slaves
2396                  */
2397                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2398
2399                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2400                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2401
2402                         ethdev->data->dev_link.link_speed +=
2403                                         slave_link.link_speed;
2404                 }
2405         }
2406
2407
2408         return 0;
2409 }
2410
2411
2412 static int
2413 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2414 {
2415         struct bond_dev_private *internals = dev->data->dev_private;
2416         struct rte_eth_stats slave_stats;
2417         int i, j;
2418
2419         for (i = 0; i < internals->slave_count; i++) {
2420                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2421
2422                 stats->ipackets += slave_stats.ipackets;
2423                 stats->opackets += slave_stats.opackets;
2424                 stats->ibytes += slave_stats.ibytes;
2425                 stats->obytes += slave_stats.obytes;
2426                 stats->imissed += slave_stats.imissed;
2427                 stats->ierrors += slave_stats.ierrors;
2428                 stats->oerrors += slave_stats.oerrors;
2429                 stats->rx_nombuf += slave_stats.rx_nombuf;
2430
2431                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2432                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2433                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2434                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2435                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2436                         stats->q_errors[j] += slave_stats.q_errors[j];
2437                 }
2438
2439         }
2440
2441         return 0;
2442 }
2443
2444 static void
2445 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2446 {
2447         struct bond_dev_private *internals = dev->data->dev_private;
2448         int i;
2449
2450         for (i = 0; i < internals->slave_count; i++)
2451                 rte_eth_stats_reset(internals->slaves[i].port_id);
2452 }
2453
2454 static void
2455 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2456 {
2457         struct bond_dev_private *internals = eth_dev->data->dev_private;
2458         int i;
2459
2460         internals->promiscuous_en = 1;
2461
2462         switch (internals->mode) {
2463         /* Promiscuous mode is propagated to all slaves */
2464         case BONDING_MODE_ROUND_ROBIN:
2465         case BONDING_MODE_BALANCE:
2466         case BONDING_MODE_BROADCAST:
2467                 for (i = 0; i < internals->slave_count; i++)
2468                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2469                 break;
2470         /* In mode4 promiscus mode is managed when slave is added/removed */
2471         case BONDING_MODE_8023AD:
2472                 break;
2473         /* Promiscuous mode is propagated only to primary slave */
2474         case BONDING_MODE_ACTIVE_BACKUP:
2475         case BONDING_MODE_TLB:
2476         case BONDING_MODE_ALB:
2477         default:
2478                 /* Do not touch promisc when there cannot be primary ports */
2479                 if (internals->slave_count == 0)
2480                         break;
2481                 rte_eth_promiscuous_enable(internals->current_primary_port);
2482         }
2483 }
2484
2485 static void
2486 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2487 {
2488         struct bond_dev_private *internals = dev->data->dev_private;
2489         int i;
2490
2491         internals->promiscuous_en = 0;
2492
2493         switch (internals->mode) {
2494         /* Promiscuous mode is propagated to all slaves */
2495         case BONDING_MODE_ROUND_ROBIN:
2496         case BONDING_MODE_BALANCE:
2497         case BONDING_MODE_BROADCAST:
2498                 for (i = 0; i < internals->slave_count; i++)
2499                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2500                 break;
2501         /* In mode4 promiscus mode is set managed when slave is added/removed */
2502         case BONDING_MODE_8023AD:
2503                 break;
2504         /* Promiscuous mode is propagated only to primary slave */
2505         case BONDING_MODE_ACTIVE_BACKUP:
2506         case BONDING_MODE_TLB:
2507         case BONDING_MODE_ALB:
2508         default:
2509                 /* Do not touch promisc when there cannot be primary ports */
2510                 if (internals->slave_count == 0)
2511                         break;
2512                 rte_eth_promiscuous_disable(internals->current_primary_port);
2513         }
2514 }
2515
2516 static void
2517 bond_ethdev_delayed_lsc_propagation(void *arg)
2518 {
2519         if (arg == NULL)
2520                 return;
2521
2522         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2523                         RTE_ETH_EVENT_INTR_LSC, NULL);
2524 }
2525
2526 int
2527 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2528                 void *param, void *ret_param __rte_unused)
2529 {
2530         struct rte_eth_dev *bonded_eth_dev;
2531         struct bond_dev_private *internals;
2532         struct rte_eth_link link;
2533         int rc = -1;
2534
2535         uint8_t lsc_flag = 0;
2536         int valid_slave = 0;
2537         uint16_t active_pos;
2538         uint16_t i;
2539
2540         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2541                 return rc;
2542
2543         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2544
2545         if (check_for_bonded_ethdev(bonded_eth_dev))
2546                 return rc;
2547
2548         internals = bonded_eth_dev->data->dev_private;
2549
2550         /* If the device isn't started don't handle interrupts */
2551         if (!bonded_eth_dev->data->dev_started)
2552                 return rc;
2553
2554         /* verify that port_id is a valid slave of bonded port */
2555         for (i = 0; i < internals->slave_count; i++) {
2556                 if (internals->slaves[i].port_id == port_id) {
2557                         valid_slave = 1;
2558                         break;
2559                 }
2560         }
2561
2562         if (!valid_slave)
2563                 return rc;
2564
2565         /* Synchronize lsc callback parallel calls either by real link event
2566          * from the slaves PMDs or by the bonding PMD itself.
2567          */
2568         rte_spinlock_lock(&internals->lsc_lock);
2569
2570         /* Search for port in active port list */
2571         active_pos = find_slave_by_id(internals->active_slaves,
2572                         internals->active_slave_count, port_id);
2573
2574         rte_eth_link_get_nowait(port_id, &link);
2575         if (link.link_status) {
2576                 if (active_pos < internals->active_slave_count)
2577                         goto link_update;
2578
2579                 /* check link state properties if bonded link is up*/
2580                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2581                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2582                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2583                                              "for slave %d in bonding mode %d",
2584                                              port_id, internals->mode);
2585                 } else {
2586                         /* inherit slave link properties */
2587                         link_properties_set(bonded_eth_dev, &link);
2588                 }
2589
2590                 /* If no active slave ports then set this port to be
2591                  * the primary port.
2592                  */
2593                 if (internals->active_slave_count < 1) {
2594                         /* If first active slave, then change link status */
2595                         bonded_eth_dev->data->dev_link.link_status =
2596                                                                 ETH_LINK_UP;
2597                         internals->current_primary_port = port_id;
2598                         lsc_flag = 1;
2599
2600                         mac_address_slaves_update(bonded_eth_dev);
2601                 }
2602
2603                 activate_slave(bonded_eth_dev, port_id);
2604
2605                 /* If the user has defined the primary port then default to
2606                  * using it.
2607                  */
2608                 if (internals->user_defined_primary_port &&
2609                                 internals->primary_port == port_id)
2610                         bond_ethdev_primary_set(internals, port_id);
2611         } else {
2612                 if (active_pos == internals->active_slave_count)
2613                         goto link_update;
2614
2615                 /* Remove from active slave list */
2616                 deactivate_slave(bonded_eth_dev, port_id);
2617
2618                 if (internals->active_slave_count < 1)
2619                         lsc_flag = 1;
2620
2621                 /* Update primary id, take first active slave from list or if none
2622                  * available set to -1 */
2623                 if (port_id == internals->current_primary_port) {
2624                         if (internals->active_slave_count > 0)
2625                                 bond_ethdev_primary_set(internals,
2626                                                 internals->active_slaves[0]);
2627                         else
2628                                 internals->current_primary_port = internals->primary_port;
2629                 }
2630         }
2631
2632 link_update:
2633         /**
2634          * Update bonded device link properties after any change to active
2635          * slaves
2636          */
2637         bond_ethdev_link_update(bonded_eth_dev, 0);
2638
2639         if (lsc_flag) {
2640                 /* Cancel any possible outstanding interrupts if delays are enabled */
2641                 if (internals->link_up_delay_ms > 0 ||
2642                         internals->link_down_delay_ms > 0)
2643                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2644                                         bonded_eth_dev);
2645
2646                 if (bonded_eth_dev->data->dev_link.link_status) {
2647                         if (internals->link_up_delay_ms > 0)
2648                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2649                                                 bond_ethdev_delayed_lsc_propagation,
2650                                                 (void *)bonded_eth_dev);
2651                         else
2652                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2653                                                 RTE_ETH_EVENT_INTR_LSC,
2654                                                 NULL);
2655
2656                 } else {
2657                         if (internals->link_down_delay_ms > 0)
2658                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2659                                                 bond_ethdev_delayed_lsc_propagation,
2660                                                 (void *)bonded_eth_dev);
2661                         else
2662                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2663                                                 RTE_ETH_EVENT_INTR_LSC,
2664                                                 NULL);
2665                 }
2666         }
2667
2668         rte_spinlock_unlock(&internals->lsc_lock);
2669
2670         return rc;
2671 }
2672
2673 static int
2674 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2675                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2676 {
2677         unsigned i, j;
2678         int result = 0;
2679         int slave_reta_size;
2680         unsigned reta_count;
2681         struct bond_dev_private *internals = dev->data->dev_private;
2682
2683         if (reta_size != internals->reta_size)
2684                 return -EINVAL;
2685
2686          /* Copy RETA table */
2687         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2688
2689         for (i = 0; i < reta_count; i++) {
2690                 internals->reta_conf[i].mask = reta_conf[i].mask;
2691                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2692                         if ((reta_conf[i].mask >> j) & 0x01)
2693                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2694         }
2695
2696         /* Fill rest of array */
2697         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2698                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2699                                 sizeof(internals->reta_conf[0]) * reta_count);
2700
2701         /* Propagate RETA over slaves */
2702         for (i = 0; i < internals->slave_count; i++) {
2703                 slave_reta_size = internals->slaves[i].reta_size;
2704                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2705                                 &internals->reta_conf[0], slave_reta_size);
2706                 if (result < 0)
2707                         return result;
2708         }
2709
2710         return 0;
2711 }
2712
2713 static int
2714 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2715                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2716 {
2717         int i, j;
2718         struct bond_dev_private *internals = dev->data->dev_private;
2719
2720         if (reta_size != internals->reta_size)
2721                 return -EINVAL;
2722
2723          /* Copy RETA table */
2724         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2725                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2726                         if ((reta_conf[i].mask >> j) & 0x01)
2727                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2728
2729         return 0;
2730 }
2731
2732 static int
2733 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2734                 struct rte_eth_rss_conf *rss_conf)
2735 {
2736         int i, result = 0;
2737         struct bond_dev_private *internals = dev->data->dev_private;
2738         struct rte_eth_rss_conf bond_rss_conf;
2739
2740         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2741
2742         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2743
2744         if (bond_rss_conf.rss_hf != 0)
2745                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2746
2747         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2748                         sizeof(internals->rss_key)) {
2749                 if (bond_rss_conf.rss_key_len == 0)
2750                         bond_rss_conf.rss_key_len = 40;
2751                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2752                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2753                                 internals->rss_key_len);
2754         }
2755
2756         for (i = 0; i < internals->slave_count; i++) {
2757                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2758                                 &bond_rss_conf);
2759                 if (result < 0)
2760                         return result;
2761         }
2762
2763         return 0;
2764 }
2765
2766 static int
2767 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2768                 struct rte_eth_rss_conf *rss_conf)
2769 {
2770         struct bond_dev_private *internals = dev->data->dev_private;
2771
2772         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2773         rss_conf->rss_key_len = internals->rss_key_len;
2774         if (rss_conf->rss_key)
2775                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2776
2777         return 0;
2778 }
2779
2780 static int
2781 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2782 {
2783         struct rte_eth_dev *slave_eth_dev;
2784         struct bond_dev_private *internals = dev->data->dev_private;
2785         int ret, i;
2786
2787         rte_spinlock_lock(&internals->lock);
2788
2789         for (i = 0; i < internals->slave_count; i++) {
2790                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2791                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2792                         rte_spinlock_unlock(&internals->lock);
2793                         return -ENOTSUP;
2794                 }
2795         }
2796         for (i = 0; i < internals->slave_count; i++) {
2797                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2798                 if (ret < 0) {
2799                         rte_spinlock_unlock(&internals->lock);
2800                         return ret;
2801                 }
2802         }
2803
2804         rte_spinlock_unlock(&internals->lock);
2805         return 0;
2806 }
2807
2808 static int
2809 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2810                         struct rte_ether_addr *addr)
2811 {
2812         if (mac_address_set(dev, addr)) {
2813                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2814                 return -EINVAL;
2815         }
2816
2817         return 0;
2818 }
2819
2820 static int
2821 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2822                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2823 {
2824         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2825                 *(const void **)arg = &bond_flow_ops;
2826                 return 0;
2827         }
2828         return -ENOTSUP;
2829 }
2830
2831 static int
2832 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2833                         struct rte_ether_addr *mac_addr,
2834                         __rte_unused uint32_t index, uint32_t vmdq)
2835 {
2836         struct rte_eth_dev *slave_eth_dev;
2837         struct bond_dev_private *internals = dev->data->dev_private;
2838         int ret, i;
2839
2840         rte_spinlock_lock(&internals->lock);
2841
2842         for (i = 0; i < internals->slave_count; i++) {
2843                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2844                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2845                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2846                         ret = -ENOTSUP;
2847                         goto end;
2848                 }
2849         }
2850
2851         for (i = 0; i < internals->slave_count; i++) {
2852                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2853                                 mac_addr, vmdq);
2854                 if (ret < 0) {
2855                         /* rollback */
2856                         for (i--; i >= 0; i--)
2857                                 rte_eth_dev_mac_addr_remove(
2858                                         internals->slaves[i].port_id, mac_addr);
2859                         goto end;
2860                 }
2861         }
2862
2863         ret = 0;
2864 end:
2865         rte_spinlock_unlock(&internals->lock);
2866         return ret;
2867 }
2868
2869 static void
2870 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2871 {
2872         struct rte_eth_dev *slave_eth_dev;
2873         struct bond_dev_private *internals = dev->data->dev_private;
2874         int i;
2875
2876         rte_spinlock_lock(&internals->lock);
2877
2878         for (i = 0; i < internals->slave_count; i++) {
2879                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2880                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2881                         goto end;
2882         }
2883
2884         struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
2885
2886         for (i = 0; i < internals->slave_count; i++)
2887                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2888                                 mac_addr);
2889
2890 end:
2891         rte_spinlock_unlock(&internals->lock);
2892 }
2893
2894 const struct eth_dev_ops default_dev_ops = {
2895         .dev_start            = bond_ethdev_start,
2896         .dev_stop             = bond_ethdev_stop,
2897         .dev_close            = bond_ethdev_close,
2898         .dev_configure        = bond_ethdev_configure,
2899         .dev_infos_get        = bond_ethdev_info,
2900         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2901         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2902         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2903         .rx_queue_release     = bond_ethdev_rx_queue_release,
2904         .tx_queue_release     = bond_ethdev_tx_queue_release,
2905         .link_update          = bond_ethdev_link_update,
2906         .stats_get            = bond_ethdev_stats_get,
2907         .stats_reset          = bond_ethdev_stats_reset,
2908         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2909         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2910         .reta_update          = bond_ethdev_rss_reta_update,
2911         .reta_query           = bond_ethdev_rss_reta_query,
2912         .rss_hash_update      = bond_ethdev_rss_hash_update,
2913         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2914         .mtu_set              = bond_ethdev_mtu_set,
2915         .mac_addr_set         = bond_ethdev_mac_address_set,
2916         .mac_addr_add         = bond_ethdev_mac_addr_add,
2917         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
2918         .filter_ctrl          = bond_filter_ctrl
2919 };
2920
2921 static int
2922 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2923 {
2924         const char *name = rte_vdev_device_name(dev);
2925         uint8_t socket_id = dev->device.numa_node;
2926         struct bond_dev_private *internals = NULL;
2927         struct rte_eth_dev *eth_dev = NULL;
2928         uint32_t vlan_filter_bmp_size;
2929
2930         /* now do all data allocation - for eth_dev structure, dummy pci driver
2931          * and internal (private) data
2932          */
2933
2934         /* reserve an ethdev entry */
2935         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2936         if (eth_dev == NULL) {
2937                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2938                 goto err;
2939         }
2940
2941         internals = eth_dev->data->dev_private;
2942         eth_dev->data->nb_rx_queues = (uint16_t)1;
2943         eth_dev->data->nb_tx_queues = (uint16_t)1;
2944
2945         /* Allocate memory for storing MAC addresses */
2946         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
2947                         BOND_MAX_MAC_ADDRS, 0, socket_id);
2948         if (eth_dev->data->mac_addrs == NULL) {
2949                 RTE_BOND_LOG(ERR,
2950                              "Failed to allocate %u bytes needed to store MAC addresses",
2951                              RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
2952                 goto err;
2953         }
2954
2955         eth_dev->dev_ops = &default_dev_ops;
2956         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2957
2958         rte_spinlock_init(&internals->lock);
2959         rte_spinlock_init(&internals->lsc_lock);
2960
2961         internals->port_id = eth_dev->data->port_id;
2962         internals->mode = BONDING_MODE_INVALID;
2963         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2964         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2965         internals->burst_xmit_hash = burst_xmit_l2_hash;
2966         internals->user_defined_mac = 0;
2967
2968         internals->link_status_polling_enabled = 0;
2969
2970         internals->link_status_polling_interval_ms =
2971                 DEFAULT_POLLING_INTERVAL_10_MS;
2972         internals->link_down_delay_ms = 0;
2973         internals->link_up_delay_ms = 0;
2974
2975         internals->slave_count = 0;
2976         internals->active_slave_count = 0;
2977         internals->rx_offload_capa = 0;
2978         internals->tx_offload_capa = 0;
2979         internals->rx_queue_offload_capa = 0;
2980         internals->tx_queue_offload_capa = 0;
2981         internals->candidate_max_rx_pktlen = 0;
2982         internals->max_rx_pktlen = 0;
2983
2984         /* Initially allow to choose any offload type */
2985         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2986
2987         memset(&internals->default_rxconf, 0,
2988                sizeof(internals->default_rxconf));
2989         memset(&internals->default_txconf, 0,
2990                sizeof(internals->default_txconf));
2991
2992         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
2993         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
2994
2995         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2996         memset(internals->slaves, 0, sizeof(internals->slaves));
2997
2998         TAILQ_INIT(&internals->flow_list);
2999         internals->flow_isolated_valid = 0;
3000
3001         /* Set mode 4 default configuration */
3002         bond_mode_8023ad_setup(eth_dev, NULL);
3003         if (bond_ethdev_mode_set(eth_dev, mode)) {
3004                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3005                                  eth_dev->data->port_id, mode);
3006                 goto err;
3007         }
3008
3009         vlan_filter_bmp_size =
3010                 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3011         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3012                                                    RTE_CACHE_LINE_SIZE);
3013         if (internals->vlan_filter_bmpmem == NULL) {
3014                 RTE_BOND_LOG(ERR,
3015                              "Failed to allocate vlan bitmap for bonded device %u",
3016                              eth_dev->data->port_id);
3017                 goto err;
3018         }
3019
3020         internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3021                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3022         if (internals->vlan_filter_bmp == NULL) {
3023                 RTE_BOND_LOG(ERR,
3024                              "Failed to init vlan bitmap for bonded device %u",
3025                              eth_dev->data->port_id);
3026                 rte_free(internals->vlan_filter_bmpmem);
3027                 goto err;
3028         }
3029
3030         return eth_dev->data->port_id;
3031
3032 err:
3033         rte_free(internals);
3034         if (eth_dev != NULL)
3035                 eth_dev->data->dev_private = NULL;
3036         rte_eth_dev_release_port(eth_dev);
3037         return -1;
3038 }
3039
3040 static int
3041 bond_probe(struct rte_vdev_device *dev)
3042 {
3043         const char *name;
3044         struct bond_dev_private *internals;
3045         struct rte_kvargs *kvlist;
3046         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3047         int  arg_count, port_id;
3048         uint8_t agg_mode;
3049         struct rte_eth_dev *eth_dev;
3050
3051         if (!dev)
3052                 return -EINVAL;
3053
3054         name = rte_vdev_device_name(dev);
3055         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3056
3057         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3058                 eth_dev = rte_eth_dev_attach_secondary(name);
3059                 if (!eth_dev) {
3060                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3061                         return -1;
3062                 }
3063                 /* TODO: request info from primary to set up Rx and Tx */
3064                 eth_dev->dev_ops = &default_dev_ops;
3065                 eth_dev->device = &dev->device;
3066                 rte_eth_dev_probing_finish(eth_dev);
3067                 return 0;
3068         }
3069
3070         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3071                 pmd_bond_init_valid_arguments);
3072         if (kvlist == NULL)
3073                 return -1;
3074
3075         /* Parse link bonding mode */
3076         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3077                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3078                                 &bond_ethdev_parse_slave_mode_kvarg,
3079                                 &bonding_mode) != 0) {
3080                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3081                                         name);
3082                         goto parse_error;
3083                 }
3084         } else {
3085                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3086                                 "device %s", name);
3087                 goto parse_error;
3088         }
3089
3090         /* Parse socket id to create bonding device on */
3091         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3092         if (arg_count == 1) {
3093                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3094                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3095                                 != 0) {
3096                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3097                                         "bonded device %s", name);
3098                         goto parse_error;
3099                 }
3100         } else if (arg_count > 1) {
3101                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3102                                 "bonded device %s", name);
3103                 goto parse_error;
3104         } else {
3105                 socket_id = rte_socket_id();
3106         }
3107
3108         dev->device.numa_node = socket_id;
3109
3110         /* Create link bonding eth device */
3111         port_id = bond_alloc(dev, bonding_mode);
3112         if (port_id < 0) {
3113                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3114                                 "socket %u.",   name, bonding_mode, socket_id);
3115                 goto parse_error;
3116         }
3117         internals = rte_eth_devices[port_id].data->dev_private;
3118         internals->kvlist = kvlist;
3119
3120         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3121                 if (rte_kvargs_process(kvlist,
3122                                 PMD_BOND_AGG_MODE_KVARG,
3123                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3124                                 &agg_mode) != 0) {
3125                         RTE_BOND_LOG(ERR,
3126                                         "Failed to parse agg selection mode for bonded device %s",
3127                                         name);
3128                         goto parse_error;
3129                 }
3130
3131                 if (internals->mode == BONDING_MODE_8023AD)
3132                         internals->mode4.agg_selection = agg_mode;
3133         } else {
3134                 internals->mode4.agg_selection = AGG_STABLE;
3135         }
3136
3137         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3138         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3139                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3140         return 0;
3141
3142 parse_error:
3143         rte_kvargs_free(kvlist);
3144
3145         return -1;
3146 }
3147
3148 static int
3149 bond_remove(struct rte_vdev_device *dev)
3150 {
3151         struct rte_eth_dev *eth_dev;
3152         struct bond_dev_private *internals;
3153         const char *name;
3154
3155         if (!dev)
3156                 return -EINVAL;
3157
3158         name = rte_vdev_device_name(dev);
3159         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3160
3161         /* now free all data allocation - for eth_dev structure,
3162          * dummy pci driver and internal (private) data
3163          */
3164
3165         /* find an ethdev entry */
3166         eth_dev = rte_eth_dev_allocated(name);
3167         if (eth_dev == NULL)
3168                 return -ENODEV;
3169
3170         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3171                 return rte_eth_dev_release_port(eth_dev);
3172
3173         RTE_ASSERT(eth_dev->device == &dev->device);
3174
3175         internals = eth_dev->data->dev_private;
3176         if (internals->slave_count != 0)
3177                 return -EBUSY;
3178
3179         if (eth_dev->data->dev_started == 1) {
3180                 bond_ethdev_stop(eth_dev);
3181                 bond_ethdev_close(eth_dev);
3182         }
3183
3184         eth_dev->dev_ops = NULL;
3185         eth_dev->rx_pkt_burst = NULL;
3186         eth_dev->tx_pkt_burst = NULL;
3187
3188         internals = eth_dev->data->dev_private;
3189         /* Try to release mempool used in mode6. If the bond
3190          * device is not mode6, free the NULL is not problem.
3191          */
3192         rte_mempool_free(internals->mode6.mempool);
3193         rte_bitmap_free(internals->vlan_filter_bmp);
3194         rte_free(internals->vlan_filter_bmpmem);
3195
3196         rte_eth_dev_release_port(eth_dev);
3197
3198         return 0;
3199 }
3200
3201 /* this part will resolve the slave portids after all the other pdev and vdev
3202  * have been allocated */
3203 static int
3204 bond_ethdev_configure(struct rte_eth_dev *dev)
3205 {
3206         const char *name = dev->device->name;
3207         struct bond_dev_private *internals = dev->data->dev_private;
3208         struct rte_kvargs *kvlist = internals->kvlist;
3209         int arg_count;
3210         uint16_t port_id = dev - rte_eth_devices;
3211         uint8_t agg_mode;
3212
3213         static const uint8_t default_rss_key[40] = {
3214                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3215                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3216                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3217                 0xBE, 0xAC, 0x01, 0xFA
3218         };
3219
3220         unsigned i, j;
3221
3222         /*
3223          * If RSS is enabled, fill table with default values and
3224          * set key to the the value specified in port RSS configuration.
3225          * Fall back to default RSS key if the key is not specified
3226          */
3227         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3228                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3229                         internals->rss_key_len =
3230                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3231                         memcpy(internals->rss_key,
3232                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3233                                internals->rss_key_len);
3234                 } else {
3235                         internals->rss_key_len = sizeof(default_rss_key);
3236                         memcpy(internals->rss_key, default_rss_key,
3237                                internals->rss_key_len);
3238                 }
3239
3240                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3241                         internals->reta_conf[i].mask = ~0LL;
3242                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3243                                 internals->reta_conf[i].reta[j] =
3244                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3245                                                 dev->data->nb_rx_queues;
3246                 }
3247         }
3248
3249         /* set the max_rx_pktlen */
3250         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3251
3252         /*
3253          * if no kvlist, it means that this bonded device has been created
3254          * through the bonding api.
3255          */
3256         if (!kvlist)
3257                 return 0;
3258
3259         /* Parse MAC address for bonded device */
3260         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3261         if (arg_count == 1) {
3262                 struct rte_ether_addr bond_mac;
3263
3264                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3265                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3266                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3267                                      name);
3268                         return -1;
3269                 }
3270
3271                 /* Set MAC address */
3272                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3273                         RTE_BOND_LOG(ERR,
3274                                      "Failed to set mac address on bonded device %s",
3275                                      name);
3276                         return -1;
3277                 }
3278         } else if (arg_count > 1) {
3279                 RTE_BOND_LOG(ERR,
3280                              "MAC address can be specified only once for bonded device %s",
3281                              name);
3282                 return -1;
3283         }
3284
3285         /* Parse/set balance mode transmit policy */
3286         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3287         if (arg_count == 1) {
3288                 uint8_t xmit_policy;
3289
3290                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3291                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3292                     0) {
3293                         RTE_BOND_LOG(INFO,
3294                                      "Invalid xmit policy specified for bonded device %s",
3295                                      name);
3296                         return -1;
3297                 }
3298
3299                 /* Set balance mode transmit policy*/
3300                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3301                         RTE_BOND_LOG(ERR,
3302                                      "Failed to set balance xmit policy on bonded device %s",
3303                                      name);
3304                         return -1;
3305                 }
3306         } else if (arg_count > 1) {
3307                 RTE_BOND_LOG(ERR,
3308                              "Transmit policy can be specified only once for bonded device %s",
3309                              name);
3310                 return -1;
3311         }
3312
3313         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3314                 if (rte_kvargs_process(kvlist,
3315                                        PMD_BOND_AGG_MODE_KVARG,
3316                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3317                                        &agg_mode) != 0) {
3318                         RTE_BOND_LOG(ERR,
3319                                      "Failed to parse agg selection mode for bonded device %s",
3320                                      name);
3321                 }
3322                 if (internals->mode == BONDING_MODE_8023AD) {
3323                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3324                                         agg_mode);
3325                         if (ret < 0) {
3326                                 RTE_BOND_LOG(ERR,
3327                                         "Invalid args for agg selection set for bonded device %s",
3328                                         name);
3329                                 return -1;
3330                         }
3331                 }
3332         }
3333
3334         /* Parse/add slave ports to bonded device */
3335         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3336                 struct bond_ethdev_slave_ports slave_ports;
3337                 unsigned i;
3338
3339                 memset(&slave_ports, 0, sizeof(slave_ports));
3340
3341                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3342                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3343                         RTE_BOND_LOG(ERR,
3344                                      "Failed to parse slave ports for bonded device %s",
3345                                      name);
3346                         return -1;
3347                 }
3348
3349                 for (i = 0; i < slave_ports.slave_count; i++) {
3350                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3351                                 RTE_BOND_LOG(ERR,
3352                                              "Failed to add port %d as slave to bonded device %s",
3353                                              slave_ports.slaves[i], name);
3354                         }
3355                 }
3356
3357         } else {
3358                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3359                 return -1;
3360         }
3361
3362         /* Parse/set primary slave port id*/
3363         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3364         if (arg_count == 1) {
3365                 uint16_t primary_slave_port_id;
3366
3367                 if (rte_kvargs_process(kvlist,
3368                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3369                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3370                                        &primary_slave_port_id) < 0) {
3371                         RTE_BOND_LOG(INFO,
3372                                      "Invalid primary slave port id specified for bonded device %s",
3373                                      name);
3374                         return -1;
3375                 }
3376
3377                 /* Set balance mode transmit policy*/
3378                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3379                     != 0) {
3380                         RTE_BOND_LOG(ERR,
3381                                      "Failed to set primary slave port %d on bonded device %s",
3382                                      primary_slave_port_id, name);
3383                         return -1;
3384                 }
3385         } else if (arg_count > 1) {
3386                 RTE_BOND_LOG(INFO,
3387                              "Primary slave can be specified only once for bonded device %s",
3388                              name);
3389                 return -1;
3390         }
3391
3392         /* Parse link status monitor polling interval */
3393         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3394         if (arg_count == 1) {
3395                 uint32_t lsc_poll_interval_ms;
3396
3397                 if (rte_kvargs_process(kvlist,
3398                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3399                                        &bond_ethdev_parse_time_ms_kvarg,
3400                                        &lsc_poll_interval_ms) < 0) {
3401                         RTE_BOND_LOG(INFO,
3402                                      "Invalid lsc polling interval value specified for bonded"
3403                                      " device %s", name);
3404                         return -1;
3405                 }
3406
3407                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3408                     != 0) {
3409                         RTE_BOND_LOG(ERR,
3410                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3411                                      lsc_poll_interval_ms, name);
3412                         return -1;
3413                 }
3414         } else if (arg_count > 1) {
3415                 RTE_BOND_LOG(INFO,
3416                              "LSC polling interval can be specified only once for bonded"
3417                              " device %s", name);
3418                 return -1;
3419         }
3420
3421         /* Parse link up interrupt propagation delay */
3422         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3423         if (arg_count == 1) {
3424                 uint32_t link_up_delay_ms;
3425
3426                 if (rte_kvargs_process(kvlist,
3427                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3428                                        &bond_ethdev_parse_time_ms_kvarg,
3429                                        &link_up_delay_ms) < 0) {
3430                         RTE_BOND_LOG(INFO,
3431                                      "Invalid link up propagation delay value specified for"
3432                                      " bonded device %s", name);
3433                         return -1;
3434                 }
3435
3436                 /* Set balance mode transmit policy*/
3437                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3438                     != 0) {
3439                         RTE_BOND_LOG(ERR,
3440                                      "Failed to set link up propagation delay (%u ms) on bonded"
3441                                      " device %s", link_up_delay_ms, name);
3442                         return -1;
3443                 }
3444         } else if (arg_count > 1) {
3445                 RTE_BOND_LOG(INFO,
3446                              "Link up propagation delay can be specified only once for"
3447                              " bonded device %s", name);
3448                 return -1;
3449         }
3450
3451         /* Parse link down interrupt propagation delay */
3452         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3453         if (arg_count == 1) {
3454                 uint32_t link_down_delay_ms;
3455
3456                 if (rte_kvargs_process(kvlist,
3457                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3458                                        &bond_ethdev_parse_time_ms_kvarg,
3459                                        &link_down_delay_ms) < 0) {
3460                         RTE_BOND_LOG(INFO,
3461                                      "Invalid link down propagation delay value specified for"
3462                                      " bonded device %s", name);
3463                         return -1;
3464                 }
3465
3466                 /* Set balance mode transmit policy*/
3467                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3468                     != 0) {
3469                         RTE_BOND_LOG(ERR,
3470                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3471                                      link_down_delay_ms, name);
3472                         return -1;
3473                 }
3474         } else if (arg_count > 1) {
3475                 RTE_BOND_LOG(INFO,
3476                              "Link down propagation delay can be specified only once for  bonded device %s",
3477                              name);
3478                 return -1;
3479         }
3480
3481         return 0;
3482 }
3483
3484 struct rte_vdev_driver pmd_bond_drv = {
3485         .probe = bond_probe,
3486         .remove = bond_remove,
3487 };
3488
3489 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3490 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3491
3492 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3493         "slave=<ifc> "
3494         "primary=<ifc> "
3495         "mode=[0-6] "
3496         "xmit_policy=[l2 | l23 | l34] "
3497         "agg_mode=[count | stable | bandwidth] "
3498         "socket_id=<int> "
3499         "mac=<mac addr> "
3500         "lsc_poll_period_ms=<int> "
3501         "up_delay=<int> "
3502         "down_delay=<int>");
3503
3504 int bond_logtype;
3505
3506 RTE_INIT(bond_init_log)
3507 {
3508         bond_logtype = rte_log_register("pmd.net.bond");
3509         if (bond_logtype >= 0)
3510                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3511 }