net/bonding: fix LACP fast queue Rx handler
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22
23 #include "rte_eth_bond.h"
24 #include "rte_eth_bond_private.h"
25 #include "rte_eth_bond_8023ad_private.h"
26
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35
36 static inline size_t
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39         size_t vlan_offset = 0;
40
41         if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42                 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43                 struct rte_vlan_hdr *vlan_hdr =
44                         (struct rte_vlan_hdr *)(eth_hdr + 1);
45
46                 vlan_offset = sizeof(struct rte_vlan_hdr);
47                 *proto = vlan_hdr->eth_proto;
48
49                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50                         vlan_hdr = vlan_hdr + 1;
51                         *proto = vlan_hdr->eth_proto;
52                         vlan_offset += sizeof(struct rte_vlan_hdr);
53                 }
54         }
55         return vlan_offset;
56 }
57
58 static uint16_t
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61         struct bond_dev_private *internals;
62
63         uint16_t num_rx_total = 0;
64         uint16_t slave_count;
65         uint16_t active_slave;
66         int i;
67
68         /* Cast to structure, containing bonded device's port id and queue id */
69         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70         internals = bd_rx_q->dev_private;
71         slave_count = internals->active_slave_count;
72         active_slave = internals->active_slave;
73
74         for (i = 0; i < slave_count && nb_pkts; i++) {
75                 uint16_t num_rx_slave;
76
77                 /* Offset of pointer to *bufs increases as packets are received
78                  * from other slaves */
79                 num_rx_slave =
80                         rte_eth_rx_burst(internals->active_slaves[active_slave],
81                                          bd_rx_q->queue_id,
82                                          bufs + num_rx_total, nb_pkts);
83                 num_rx_total += num_rx_slave;
84                 nb_pkts -= num_rx_slave;
85                 if (++active_slave == slave_count)
86                         active_slave = 0;
87         }
88
89         if (++internals->active_slave >= slave_count)
90                 internals->active_slave = 0;
91         return num_rx_total;
92 }
93
94 static uint16_t
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96                 uint16_t nb_pkts)
97 {
98         struct bond_dev_private *internals;
99
100         /* Cast to structure, containing bonded device's port id and queue id */
101         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102
103         internals = bd_rx_q->dev_private;
104
105         return rte_eth_rx_burst(internals->current_primary_port,
106                         bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112         const uint16_t ether_type_slow_be =
113                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114
115         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116                 (ethertype == ether_type_slow_be &&
117                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125         .dst.addr_bytes = { 0 },
126         .src.addr_bytes = { 0 },
127         .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131         .dst.addr_bytes = { 0 },
132         .src.addr_bytes = { 0 },
133         .type = 0xFFFF,
134 };
135
136 static struct rte_flow_item flow_item_8023ad[] = {
137         {
138                 .type = RTE_FLOW_ITEM_TYPE_ETH,
139                 .spec = &flow_item_eth_type_8023ad,
140                 .last = NULL,
141                 .mask = &flow_item_eth_mask_type_8023ad,
142         },
143         {
144                 .type = RTE_FLOW_ITEM_TYPE_END,
145                 .spec = NULL,
146                 .last = NULL,
147                 .mask = NULL,
148         }
149 };
150
151 const struct rte_flow_attr flow_attr_8023ad = {
152         .group = 0,
153         .priority = 0,
154         .ingress = 1,
155         .egress = 0,
156         .reserved = 0,
157 };
158
159 int
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161                 uint16_t slave_port) {
162         struct rte_eth_dev_info slave_info;
163         struct rte_flow_error error;
164         struct bond_dev_private *internals = bond_dev->data->dev_private;
165
166         const struct rte_flow_action_queue lacp_queue_conf = {
167                 .index = 0,
168         };
169
170         const struct rte_flow_action actions[] = {
171                 {
172                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173                         .conf = &lacp_queue_conf
174                 },
175                 {
176                         .type = RTE_FLOW_ACTION_TYPE_END,
177                 }
178         };
179
180         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181                         flow_item_8023ad, actions, &error);
182         if (ret < 0) {
183                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184                                 __func__, error.message, slave_port,
185                                 internals->mode4.dedicated_queues.rx_qid);
186                 return -1;
187         }
188
189         rte_eth_dev_info_get(slave_port, &slave_info);
190         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
191                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
192                 RTE_BOND_LOG(ERR,
193                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
194                         __func__, slave_port);
195                 return -1;
196         }
197
198         return 0;
199 }
200
201 int
202 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
203         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
204         struct bond_dev_private *internals = bond_dev->data->dev_private;
205         struct rte_eth_dev_info bond_info;
206         uint16_t idx;
207
208         /* Verify if all slaves in bonding supports flow director and */
209         if (internals->slave_count > 0) {
210                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
211
212                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
214
215                 for (idx = 0; idx < internals->slave_count; idx++) {
216                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
217                                         internals->slaves[idx].port_id) != 0)
218                                 return -1;
219                 }
220         }
221
222         return 0;
223 }
224
225 int
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
227
228         struct rte_flow_error error;
229         struct bond_dev_private *internals = bond_dev->data->dev_private;
230         struct rte_flow_action_queue lacp_queue_conf = {
231                 .index = internals->mode4.dedicated_queues.rx_qid,
232         };
233
234         const struct rte_flow_action actions[] = {
235                 {
236                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
237                         .conf = &lacp_queue_conf
238                 },
239                 {
240                         .type = RTE_FLOW_ACTION_TYPE_END,
241                 }
242         };
243
244         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
245                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
246         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
247                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
248                                 "(slave_port=%d queue_id=%d)",
249                                 error.message, slave_port,
250                                 internals->mode4.dedicated_queues.rx_qid);
251                 return -1;
252         }
253
254         return 0;
255 }
256
257 static inline uint16_t
258 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
259                 bool dedicated_rxq)
260 {
261         /* Cast to structure, containing bonded device's port id and queue id */
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         struct rte_eth_dev *bonded_eth_dev =
265                                         &rte_eth_devices[internals->port_id];
266         struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
267         struct rte_ether_hdr *hdr;
268
269         const uint16_t ether_type_slow_be =
270                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
271         uint16_t num_rx_total = 0;      /* Total number of received packets */
272         uint16_t slaves[RTE_MAX_ETHPORTS];
273         uint16_t slave_count, idx;
274
275         uint8_t collecting;  /* current slave collecting status */
276         const uint8_t promisc = internals->promiscuous_en;
277         uint8_t subtype;
278         uint16_t i;
279         uint16_t j;
280         uint16_t k;
281
282         /* Copy slave list to protect against slave up/down changes during tx
283          * bursting */
284         slave_count = internals->active_slave_count;
285         memcpy(slaves, internals->active_slaves,
286                         sizeof(internals->active_slaves[0]) * slave_count);
287
288         idx = internals->active_slave;
289         if (idx >= slave_count) {
290                 internals->active_slave = 0;
291                 idx = 0;
292         }
293         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
294                 j = num_rx_total;
295                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
296                                          COLLECTING);
297
298                 /* Read packets from this slave */
299                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
300                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
301
302                 for (k = j; k < 2 && k < num_rx_total; k++)
303                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
304
305                 /* Handle slow protocol packets. */
306                 while (j < num_rx_total) {
307
308                         /* If packet is not pure L2 and is known, skip it */
309                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
310                                 j++;
311                                 continue;
312                         }
313
314                         if (j + 3 < num_rx_total)
315                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
316
317                         hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
318                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
319
320                         /* Remove packet from array if:
321                          * - it is slow packet but no dedicated rxq is present,
322                          * - slave is not in collecting state,
323                          * - bonding interface is not in promiscuous mode and
324                          *   packet is not multicast and address does not match,
325                          */
326                         if (unlikely(
327                                 (!dedicated_rxq &&
328                                  is_lacp_packets(hdr->ether_type, subtype,
329                                                  bufs[j])) ||
330                                 !collecting ||
331                                 (!promisc &&
332                                  !rte_is_multicast_ether_addr(&hdr->d_addr) &&
333                                  !rte_is_same_ether_addr(bond_mac,
334                                                      &hdr->d_addr)))) {
335
336                                 if (hdr->ether_type == ether_type_slow_be) {
337                                         bond_mode_8023ad_handle_slow_pkt(
338                                             internals, slaves[idx], bufs[j]);
339                                 } else
340                                         rte_pktmbuf_free(bufs[j]);
341
342                                 /* Packet is managed by mode 4 or dropped, shift the array */
343                                 num_rx_total--;
344                                 if (j < num_rx_total) {
345                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
346                                                 (num_rx_total - j));
347                                 }
348                         } else
349                                 j++;
350                 }
351                 if (unlikely(++idx == slave_count))
352                         idx = 0;
353         }
354
355         if (++internals->active_slave >= slave_count)
356                 internals->active_slave = 0;
357
358         return num_rx_total;
359 }
360
361 static uint16_t
362 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
363                 uint16_t nb_pkts)
364 {
365         return rx_burst_8023ad(queue, bufs, nb_pkts, false);
366 }
367
368 static uint16_t
369 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
370                 uint16_t nb_pkts)
371 {
372         return rx_burst_8023ad(queue, bufs, nb_pkts, true);
373 }
374
375 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
376 uint32_t burstnumberRX;
377 uint32_t burstnumberTX;
378
379 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
380
381 static void
382 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
383 {
384         switch (arp_op) {
385         case RTE_ARP_OP_REQUEST:
386                 strlcpy(buf, "ARP Request", buf_len);
387                 return;
388         case RTE_ARP_OP_REPLY:
389                 strlcpy(buf, "ARP Reply", buf_len);
390                 return;
391         case RTE_ARP_OP_REVREQUEST:
392                 strlcpy(buf, "Reverse ARP Request", buf_len);
393                 return;
394         case RTE_ARP_OP_REVREPLY:
395                 strlcpy(buf, "Reverse ARP Reply", buf_len);
396                 return;
397         case RTE_ARP_OP_INVREQUEST:
398                 strlcpy(buf, "Peer Identify Request", buf_len);
399                 return;
400         case RTE_ARP_OP_INVREPLY:
401                 strlcpy(buf, "Peer Identify Reply", buf_len);
402                 return;
403         default:
404                 break;
405         }
406         strlcpy(buf, "Unknown", buf_len);
407         return;
408 }
409 #endif
410 #define MaxIPv4String   16
411 static void
412 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
413 {
414         uint32_t ipv4_addr;
415
416         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
417         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
418                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
419                 ipv4_addr & 0xFF);
420 }
421
422 #define MAX_CLIENTS_NUMBER      128
423 uint8_t active_clients;
424 struct client_stats_t {
425         uint16_t port;
426         uint32_t ipv4_addr;
427         uint32_t ipv4_rx_packets;
428         uint32_t ipv4_tx_packets;
429 };
430 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
431
432 static void
433 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
434 {
435         int i = 0;
436
437         for (; i < MAX_CLIENTS_NUMBER; i++)     {
438                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
439                         /* Just update RX packets number for this client */
440                         if (TXorRXindicator == &burstnumberRX)
441                                 client_stats[i].ipv4_rx_packets++;
442                         else
443                                 client_stats[i].ipv4_tx_packets++;
444                         return;
445                 }
446         }
447         /* We have a new client. Insert him to the table, and increment stats */
448         if (TXorRXindicator == &burstnumberRX)
449                 client_stats[active_clients].ipv4_rx_packets++;
450         else
451                 client_stats[active_clients].ipv4_tx_packets++;
452         client_stats[active_clients].ipv4_addr = addr;
453         client_stats[active_clients].port = port;
454         active_clients++;
455
456 }
457
458 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
459 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
460         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
461                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
462                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
463                 info,                                                   \
464                 port,                                                   \
465                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
466                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
467                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
468                 src_ip,                                                 \
469                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
470                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
471                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
472                 dst_ip,                                                 \
473                 arp_op, ++burstnumber)
474 #endif
475
476 static void
477 mode6_debug(const char __attribute__((unused)) *info,
478         struct rte_ether_hdr *eth_h, uint16_t port,
479         uint32_t __attribute__((unused)) *burstnumber)
480 {
481         struct rte_ipv4_hdr *ipv4_h;
482 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
483         struct rte_arp_hdr *arp_h;
484         char dst_ip[16];
485         char ArpOp[24];
486         char buf[16];
487 #endif
488         char src_ip[16];
489
490         uint16_t ether_type = eth_h->ether_type;
491         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
492
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494         strlcpy(buf, info, 16);
495 #endif
496
497         if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
498                 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
499                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
500 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
501                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
502                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
503 #endif
504                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
505         }
506 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
507         else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
508                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
509                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
510                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
511                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
512                                 ArpOp, sizeof(ArpOp));
513                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
514         }
515 #endif
516 }
517 #endif
518
519 static uint16_t
520 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
521 {
522         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
523         struct bond_dev_private *internals = bd_tx_q->dev_private;
524         struct rte_ether_hdr *eth_h;
525         uint16_t ether_type, offset;
526         uint16_t nb_recv_pkts;
527         int i;
528
529         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
530
531         for (i = 0; i < nb_recv_pkts; i++) {
532                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
533                 ether_type = eth_h->ether_type;
534                 offset = get_vlan_offset(eth_h, &ether_type);
535
536                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
537 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
538                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
539 #endif
540                         bond_mode_alb_arp_recv(eth_h, offset, internals);
541                 }
542 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
543                 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
544                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
545 #endif
546         }
547
548         return nb_recv_pkts;
549 }
550
551 static uint16_t
552 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
553                 uint16_t nb_pkts)
554 {
555         struct bond_dev_private *internals;
556         struct bond_tx_queue *bd_tx_q;
557
558         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
559         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
560
561         uint16_t num_of_slaves;
562         uint16_t slaves[RTE_MAX_ETHPORTS];
563
564         uint16_t num_tx_total = 0, num_tx_slave;
565
566         static int slave_idx = 0;
567         int i, cslave_idx = 0, tx_fail_total = 0;
568
569         bd_tx_q = (struct bond_tx_queue *)queue;
570         internals = bd_tx_q->dev_private;
571
572         /* Copy slave list to protect against slave up/down changes during tx
573          * bursting */
574         num_of_slaves = internals->active_slave_count;
575         memcpy(slaves, internals->active_slaves,
576                         sizeof(internals->active_slaves[0]) * num_of_slaves);
577
578         if (num_of_slaves < 1)
579                 return num_tx_total;
580
581         /* Populate slaves mbuf with which packets are to be sent on it  */
582         for (i = 0; i < nb_pkts; i++) {
583                 cslave_idx = (slave_idx + i) % num_of_slaves;
584                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
585         }
586
587         /* increment current slave index so the next call to tx burst starts on the
588          * next slave */
589         slave_idx = ++cslave_idx;
590
591         /* Send packet burst on each slave device */
592         for (i = 0; i < num_of_slaves; i++) {
593                 if (slave_nb_pkts[i] > 0) {
594                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
595                                         slave_bufs[i], slave_nb_pkts[i]);
596
597                         /* if tx burst fails move packets to end of bufs */
598                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
599                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
600
601                                 tx_fail_total += tx_fail_slave;
602
603                                 memcpy(&bufs[nb_pkts - tx_fail_total],
604                                        &slave_bufs[i][num_tx_slave],
605                                        tx_fail_slave * sizeof(bufs[0]));
606                         }
607                         num_tx_total += num_tx_slave;
608                 }
609         }
610
611         return num_tx_total;
612 }
613
614 static uint16_t
615 bond_ethdev_tx_burst_active_backup(void *queue,
616                 struct rte_mbuf **bufs, uint16_t nb_pkts)
617 {
618         struct bond_dev_private *internals;
619         struct bond_tx_queue *bd_tx_q;
620
621         bd_tx_q = (struct bond_tx_queue *)queue;
622         internals = bd_tx_q->dev_private;
623
624         if (internals->active_slave_count < 1)
625                 return 0;
626
627         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
628                         bufs, nb_pkts);
629 }
630
631 static inline uint16_t
632 ether_hash(struct rte_ether_hdr *eth_hdr)
633 {
634         unaligned_uint16_t *word_src_addr =
635                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
636         unaligned_uint16_t *word_dst_addr =
637                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
638
639         return (word_src_addr[0] ^ word_dst_addr[0]) ^
640                         (word_src_addr[1] ^ word_dst_addr[1]) ^
641                         (word_src_addr[2] ^ word_dst_addr[2]);
642 }
643
644 static inline uint32_t
645 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
646 {
647         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
648 }
649
650 static inline uint32_t
651 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
652 {
653         unaligned_uint32_t *word_src_addr =
654                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
655         unaligned_uint32_t *word_dst_addr =
656                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
657
658         return (word_src_addr[0] ^ word_dst_addr[0]) ^
659                         (word_src_addr[1] ^ word_dst_addr[1]) ^
660                         (word_src_addr[2] ^ word_dst_addr[2]) ^
661                         (word_src_addr[3] ^ word_dst_addr[3]);
662 }
663
664
665 void
666 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
667                 uint16_t slave_count, uint16_t *slaves)
668 {
669         struct rte_ether_hdr *eth_hdr;
670         uint32_t hash;
671         int i;
672
673         for (i = 0; i < nb_pkts; i++) {
674                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
675
676                 hash = ether_hash(eth_hdr);
677
678                 slaves[i] = (hash ^= hash >> 8) % slave_count;
679         }
680 }
681
682 void
683 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
684                 uint16_t slave_count, uint16_t *slaves)
685 {
686         uint16_t i;
687         struct rte_ether_hdr *eth_hdr;
688         uint16_t proto;
689         size_t vlan_offset;
690         uint32_t hash, l3hash;
691
692         for (i = 0; i < nb_pkts; i++) {
693                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
694                 l3hash = 0;
695
696                 proto = eth_hdr->ether_type;
697                 hash = ether_hash(eth_hdr);
698
699                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
700
701                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
702                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
703                                         ((char *)(eth_hdr + 1) + vlan_offset);
704                         l3hash = ipv4_hash(ipv4_hdr);
705
706                 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
707                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
708                                         ((char *)(eth_hdr + 1) + vlan_offset);
709                         l3hash = ipv6_hash(ipv6_hdr);
710                 }
711
712                 hash = hash ^ l3hash;
713                 hash ^= hash >> 16;
714                 hash ^= hash >> 8;
715
716                 slaves[i] = hash % slave_count;
717         }
718 }
719
720 void
721 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
722                 uint16_t slave_count, uint16_t *slaves)
723 {
724         struct rte_ether_hdr *eth_hdr;
725         uint16_t proto;
726         size_t vlan_offset;
727         int i;
728
729         struct rte_udp_hdr *udp_hdr;
730         struct rte_tcp_hdr *tcp_hdr;
731         uint32_t hash, l3hash, l4hash;
732
733         for (i = 0; i < nb_pkts; i++) {
734                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
735                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
736                 proto = eth_hdr->ether_type;
737                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
738                 l3hash = 0;
739                 l4hash = 0;
740
741                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
742                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
743                                         ((char *)(eth_hdr + 1) + vlan_offset);
744                         size_t ip_hdr_offset;
745
746                         l3hash = ipv4_hash(ipv4_hdr);
747
748                         /* there is no L4 header in fragmented packet */
749                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
750                                                                 == 0)) {
751                                 ip_hdr_offset = (ipv4_hdr->version_ihl
752                                         & RTE_IPV4_HDR_IHL_MASK) *
753                                         RTE_IPV4_IHL_MULTIPLIER;
754
755                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
756                                         tcp_hdr = (struct rte_tcp_hdr *)
757                                                 ((char *)ipv4_hdr +
758                                                         ip_hdr_offset);
759                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
760                                                         < pkt_end)
761                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
762                                 } else if (ipv4_hdr->next_proto_id ==
763                                                                 IPPROTO_UDP) {
764                                         udp_hdr = (struct rte_udp_hdr *)
765                                                 ((char *)ipv4_hdr +
766                                                         ip_hdr_offset);
767                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
768                                                         < pkt_end)
769                                                 l4hash = HASH_L4_PORTS(udp_hdr);
770                                 }
771                         }
772                 } else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
773                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
774                                         ((char *)(eth_hdr + 1) + vlan_offset);
775                         l3hash = ipv6_hash(ipv6_hdr);
776
777                         if (ipv6_hdr->proto == IPPROTO_TCP) {
778                                 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
779                                 l4hash = HASH_L4_PORTS(tcp_hdr);
780                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
781                                 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
782                                 l4hash = HASH_L4_PORTS(udp_hdr);
783                         }
784                 }
785
786                 hash = l3hash ^ l4hash;
787                 hash ^= hash >> 16;
788                 hash ^= hash >> 8;
789
790                 slaves[i] = hash % slave_count;
791         }
792 }
793
794 struct bwg_slave {
795         uint64_t bwg_left_int;
796         uint64_t bwg_left_remainder;
797         uint16_t slave;
798 };
799
800 void
801 bond_tlb_activate_slave(struct bond_dev_private *internals) {
802         int i;
803
804         for (i = 0; i < internals->active_slave_count; i++) {
805                 tlb_last_obytets[internals->active_slaves[i]] = 0;
806         }
807 }
808
809 static int
810 bandwidth_cmp(const void *a, const void *b)
811 {
812         const struct bwg_slave *bwg_a = a;
813         const struct bwg_slave *bwg_b = b;
814         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
815         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
816                         (int64_t)bwg_a->bwg_left_remainder;
817         if (diff > 0)
818                 return 1;
819         else if (diff < 0)
820                 return -1;
821         else if (diff2 > 0)
822                 return 1;
823         else if (diff2 < 0)
824                 return -1;
825         else
826                 return 0;
827 }
828
829 static void
830 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
831                 struct bwg_slave *bwg_slave)
832 {
833         struct rte_eth_link link_status;
834
835         rte_eth_link_get_nowait(port_id, &link_status);
836         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
837         if (link_bwg == 0)
838                 return;
839         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
840         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
841         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
842 }
843
844 static void
845 bond_ethdev_update_tlb_slave_cb(void *arg)
846 {
847         struct bond_dev_private *internals = arg;
848         struct rte_eth_stats slave_stats;
849         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
850         uint16_t slave_count;
851         uint64_t tx_bytes;
852
853         uint8_t update_stats = 0;
854         uint16_t slave_id;
855         uint16_t i;
856
857         internals->slave_update_idx++;
858
859
860         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
861                 update_stats = 1;
862
863         for (i = 0; i < internals->active_slave_count; i++) {
864                 slave_id = internals->active_slaves[i];
865                 rte_eth_stats_get(slave_id, &slave_stats);
866                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
867                 bandwidth_left(slave_id, tx_bytes,
868                                 internals->slave_update_idx, &bwg_array[i]);
869                 bwg_array[i].slave = slave_id;
870
871                 if (update_stats) {
872                         tlb_last_obytets[slave_id] = slave_stats.obytes;
873                 }
874         }
875
876         if (update_stats == 1)
877                 internals->slave_update_idx = 0;
878
879         slave_count = i;
880         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
881         for (i = 0; i < slave_count; i++)
882                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
883
884         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
885                         (struct bond_dev_private *)internals);
886 }
887
888 static uint16_t
889 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
890 {
891         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
892         struct bond_dev_private *internals = bd_tx_q->dev_private;
893
894         struct rte_eth_dev *primary_port =
895                         &rte_eth_devices[internals->primary_port];
896         uint16_t num_tx_total = 0;
897         uint16_t i, j;
898
899         uint16_t num_of_slaves = internals->active_slave_count;
900         uint16_t slaves[RTE_MAX_ETHPORTS];
901
902         struct rte_ether_hdr *ether_hdr;
903         struct rte_ether_addr primary_slave_addr;
904         struct rte_ether_addr active_slave_addr;
905
906         if (num_of_slaves < 1)
907                 return num_tx_total;
908
909         memcpy(slaves, internals->tlb_slaves_order,
910                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
911
912
913         rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
914
915         if (nb_pkts > 3) {
916                 for (i = 0; i < 3; i++)
917                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
918         }
919
920         for (i = 0; i < num_of_slaves; i++) {
921                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
922                 for (j = num_tx_total; j < nb_pkts; j++) {
923                         if (j + 3 < nb_pkts)
924                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
925
926                         ether_hdr = rte_pktmbuf_mtod(bufs[j],
927                                                 struct rte_ether_hdr *);
928                         if (rte_is_same_ether_addr(&ether_hdr->s_addr,
929                                                         &primary_slave_addr))
930                                 rte_ether_addr_copy(&active_slave_addr,
931                                                 &ether_hdr->s_addr);
932 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
933                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
934 #endif
935                 }
936
937                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
938                                 bufs + num_tx_total, nb_pkts - num_tx_total);
939
940                 if (num_tx_total == nb_pkts)
941                         break;
942         }
943
944         return num_tx_total;
945 }
946
947 void
948 bond_tlb_disable(struct bond_dev_private *internals)
949 {
950         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
951 }
952
953 void
954 bond_tlb_enable(struct bond_dev_private *internals)
955 {
956         bond_ethdev_update_tlb_slave_cb(internals);
957 }
958
959 static uint16_t
960 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
961 {
962         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
963         struct bond_dev_private *internals = bd_tx_q->dev_private;
964
965         struct rte_ether_hdr *eth_h;
966         uint16_t ether_type, offset;
967
968         struct client_data *client_info;
969
970         /*
971          * We create transmit buffers for every slave and one additional to send
972          * through tlb. In worst case every packet will be send on one port.
973          */
974         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
975         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
976
977         /*
978          * We create separate transmit buffers for update packets as they won't
979          * be counted in num_tx_total.
980          */
981         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
982         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
983
984         struct rte_mbuf *upd_pkt;
985         size_t pkt_size;
986
987         uint16_t num_send, num_not_send = 0;
988         uint16_t num_tx_total = 0;
989         uint16_t slave_idx;
990
991         int i, j;
992
993         /* Search tx buffer for ARP packets and forward them to alb */
994         for (i = 0; i < nb_pkts; i++) {
995                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
996                 ether_type = eth_h->ether_type;
997                 offset = get_vlan_offset(eth_h, &ether_type);
998
999                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1000                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1001
1002                         /* Change src mac in eth header */
1003                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1004
1005                         /* Add packet to slave tx buffer */
1006                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1007                         slave_bufs_pkts[slave_idx]++;
1008                 } else {
1009                         /* If packet is not ARP, send it with TLB policy */
1010                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1011                                         bufs[i];
1012                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1013                 }
1014         }
1015
1016         /* Update connected client ARP tables */
1017         if (internals->mode6.ntt) {
1018                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1019                         client_info = &internals->mode6.client_table[i];
1020
1021                         if (client_info->in_use) {
1022                                 /* Allocate new packet to send ARP update on current slave */
1023                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1024                                 if (upd_pkt == NULL) {
1025                                         RTE_BOND_LOG(ERR,
1026                                                      "Failed to allocate ARP packet from pool");
1027                                         continue;
1028                                 }
1029                                 pkt_size = sizeof(struct rte_ether_hdr) +
1030                                         sizeof(struct rte_arp_hdr) +
1031                                         client_info->vlan_count *
1032                                         sizeof(struct rte_vlan_hdr);
1033                                 upd_pkt->data_len = pkt_size;
1034                                 upd_pkt->pkt_len = pkt_size;
1035
1036                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1037                                                 internals);
1038
1039                                 /* Add packet to update tx buffer */
1040                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1041                                 update_bufs_pkts[slave_idx]++;
1042                         }
1043                 }
1044                 internals->mode6.ntt = 0;
1045         }
1046
1047         /* Send ARP packets on proper slaves */
1048         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1049                 if (slave_bufs_pkts[i] > 0) {
1050                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1051                                         slave_bufs[i], slave_bufs_pkts[i]);
1052                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1053                                 bufs[nb_pkts - 1 - num_not_send - j] =
1054                                                 slave_bufs[i][nb_pkts - 1 - j];
1055                         }
1056
1057                         num_tx_total += num_send;
1058                         num_not_send += slave_bufs_pkts[i] - num_send;
1059
1060 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1061         /* Print TX stats including update packets */
1062                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1063                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1064                                                         struct rte_ether_hdr *);
1065                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1066                         }
1067 #endif
1068                 }
1069         }
1070
1071         /* Send update packets on proper slaves */
1072         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1073                 if (update_bufs_pkts[i] > 0) {
1074                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1075                                         update_bufs_pkts[i]);
1076                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1077                                 rte_pktmbuf_free(update_bufs[i][j]);
1078                         }
1079 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1080                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1081                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1082                                                         struct rte_ether_hdr *);
1083                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1084                         }
1085 #endif
1086                 }
1087         }
1088
1089         /* Send non-ARP packets using tlb policy */
1090         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1091                 num_send = bond_ethdev_tx_burst_tlb(queue,
1092                                 slave_bufs[RTE_MAX_ETHPORTS],
1093                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1094
1095                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1096                         bufs[nb_pkts - 1 - num_not_send - j] =
1097                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1098                 }
1099
1100                 num_tx_total += num_send;
1101         }
1102
1103         return num_tx_total;
1104 }
1105
1106 static inline uint16_t
1107 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1108                  uint16_t *slave_port_ids, uint16_t slave_count)
1109 {
1110         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1111         struct bond_dev_private *internals = bd_tx_q->dev_private;
1112
1113         /* Array to sort mbufs for transmission on each slave into */
1114         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1115         /* Number of mbufs for transmission on each slave */
1116         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1117         /* Mapping array generated by hash function to map mbufs to slaves */
1118         uint16_t bufs_slave_port_idxs[nb_bufs];
1119
1120         uint16_t slave_tx_count;
1121         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1122
1123         uint16_t i;
1124
1125         /*
1126          * Populate slaves mbuf with the packets which are to be sent on it
1127          * selecting output slave using hash based on xmit policy
1128          */
1129         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1130                         bufs_slave_port_idxs);
1131
1132         for (i = 0; i < nb_bufs; i++) {
1133                 /* Populate slave mbuf arrays with mbufs for that slave. */
1134                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1135
1136                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1137         }
1138
1139         /* Send packet burst on each slave device */
1140         for (i = 0; i < slave_count; i++) {
1141                 if (slave_nb_bufs[i] == 0)
1142                         continue;
1143
1144                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1145                                 bd_tx_q->queue_id, slave_bufs[i],
1146                                 slave_nb_bufs[i]);
1147
1148                 total_tx_count += slave_tx_count;
1149
1150                 /* If tx burst fails move packets to end of bufs */
1151                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1152                         int slave_tx_fail_count = slave_nb_bufs[i] -
1153                                         slave_tx_count;
1154                         total_tx_fail_count += slave_tx_fail_count;
1155                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1156                                &slave_bufs[i][slave_tx_count],
1157                                slave_tx_fail_count * sizeof(bufs[0]));
1158                 }
1159         }
1160
1161         return total_tx_count;
1162 }
1163
1164 static uint16_t
1165 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1166                 uint16_t nb_bufs)
1167 {
1168         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1169         struct bond_dev_private *internals = bd_tx_q->dev_private;
1170
1171         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1172         uint16_t slave_count;
1173
1174         if (unlikely(nb_bufs == 0))
1175                 return 0;
1176
1177         /* Copy slave list to protect against slave up/down changes during tx
1178          * bursting
1179          */
1180         slave_count = internals->active_slave_count;
1181         if (unlikely(slave_count < 1))
1182                 return 0;
1183
1184         memcpy(slave_port_ids, internals->active_slaves,
1185                         sizeof(slave_port_ids[0]) * slave_count);
1186         return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1187                                 slave_count);
1188 }
1189
1190 static inline uint16_t
1191 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1192                 bool dedicated_txq)
1193 {
1194         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1195         struct bond_dev_private *internals = bd_tx_q->dev_private;
1196
1197         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1198         uint16_t slave_count;
1199
1200         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1201         uint16_t dist_slave_count;
1202
1203         uint16_t slave_tx_count;
1204
1205         uint16_t i;
1206
1207         /* Copy slave list to protect against slave up/down changes during tx
1208          * bursting */
1209         slave_count = internals->active_slave_count;
1210         if (unlikely(slave_count < 1))
1211                 return 0;
1212
1213         memcpy(slave_port_ids, internals->active_slaves,
1214                         sizeof(slave_port_ids[0]) * slave_count);
1215
1216         if (dedicated_txq)
1217                 goto skip_tx_ring;
1218
1219         /* Check for LACP control packets and send if available */
1220         for (i = 0; i < slave_count; i++) {
1221                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1222                 struct rte_mbuf *ctrl_pkt = NULL;
1223
1224                 if (likely(rte_ring_empty(port->tx_ring)))
1225                         continue;
1226
1227                 if (rte_ring_dequeue(port->tx_ring,
1228                                      (void **)&ctrl_pkt) != -ENOENT) {
1229                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1230                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1231                         /*
1232                          * re-enqueue LAG control plane packets to buffering
1233                          * ring if transmission fails so the packet isn't lost.
1234                          */
1235                         if (slave_tx_count != 1)
1236                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1237                 }
1238         }
1239
1240 skip_tx_ring:
1241         if (unlikely(nb_bufs == 0))
1242                 return 0;
1243
1244         dist_slave_count = 0;
1245         for (i = 0; i < slave_count; i++) {
1246                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1247
1248                 if (ACTOR_STATE(port, DISTRIBUTING))
1249                         dist_slave_port_ids[dist_slave_count++] =
1250                                         slave_port_ids[i];
1251         }
1252
1253         if (unlikely(dist_slave_count < 1))
1254                 return 0;
1255
1256         return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1257                                 dist_slave_count);
1258 }
1259
1260 static uint16_t
1261 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1262                 uint16_t nb_bufs)
1263 {
1264         return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1265 }
1266
1267 static uint16_t
1268 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1269                 uint16_t nb_bufs)
1270 {
1271         return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1272 }
1273
1274 static uint16_t
1275 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1276                 uint16_t nb_pkts)
1277 {
1278         struct bond_dev_private *internals;
1279         struct bond_tx_queue *bd_tx_q;
1280
1281         uint16_t slaves[RTE_MAX_ETHPORTS];
1282         uint8_t tx_failed_flag = 0;
1283         uint16_t num_of_slaves;
1284
1285         uint16_t max_nb_of_tx_pkts = 0;
1286
1287         int slave_tx_total[RTE_MAX_ETHPORTS];
1288         int i, most_successful_tx_slave = -1;
1289
1290         bd_tx_q = (struct bond_tx_queue *)queue;
1291         internals = bd_tx_q->dev_private;
1292
1293         /* Copy slave list to protect against slave up/down changes during tx
1294          * bursting */
1295         num_of_slaves = internals->active_slave_count;
1296         memcpy(slaves, internals->active_slaves,
1297                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1298
1299         if (num_of_slaves < 1)
1300                 return 0;
1301
1302         /* Increment reference count on mbufs */
1303         for (i = 0; i < nb_pkts; i++)
1304                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1305
1306         /* Transmit burst on each active slave */
1307         for (i = 0; i < num_of_slaves; i++) {
1308                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1309                                         bufs, nb_pkts);
1310
1311                 if (unlikely(slave_tx_total[i] < nb_pkts))
1312                         tx_failed_flag = 1;
1313
1314                 /* record the value and slave index for the slave which transmits the
1315                  * maximum number of packets */
1316                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1317                         max_nb_of_tx_pkts = slave_tx_total[i];
1318                         most_successful_tx_slave = i;
1319                 }
1320         }
1321
1322         /* if slaves fail to transmit packets from burst, the calling application
1323          * is not expected to know about multiple references to packets so we must
1324          * handle failures of all packets except those of the most successful slave
1325          */
1326         if (unlikely(tx_failed_flag))
1327                 for (i = 0; i < num_of_slaves; i++)
1328                         if (i != most_successful_tx_slave)
1329                                 while (slave_tx_total[i] < nb_pkts)
1330                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1331
1332         return max_nb_of_tx_pkts;
1333 }
1334
1335 static void
1336 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1337 {
1338         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1339
1340         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1341                 /**
1342                  * If in mode 4 then save the link properties of the first
1343                  * slave, all subsequent slaves must match these properties
1344                  */
1345                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1346
1347                 bond_link->link_autoneg = slave_link->link_autoneg;
1348                 bond_link->link_duplex = slave_link->link_duplex;
1349                 bond_link->link_speed = slave_link->link_speed;
1350         } else {
1351                 /**
1352                  * In any other mode the link properties are set to default
1353                  * values of AUTONEG/DUPLEX
1354                  */
1355                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1356                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1357         }
1358 }
1359
1360 static int
1361 link_properties_valid(struct rte_eth_dev *ethdev,
1362                 struct rte_eth_link *slave_link)
1363 {
1364         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1365
1366         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1367                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1368
1369                 if (bond_link->link_duplex != slave_link->link_duplex ||
1370                         bond_link->link_autoneg != slave_link->link_autoneg ||
1371                         bond_link->link_speed != slave_link->link_speed)
1372                         return -1;
1373         }
1374
1375         return 0;
1376 }
1377
1378 int
1379 mac_address_get(struct rte_eth_dev *eth_dev,
1380                 struct rte_ether_addr *dst_mac_addr)
1381 {
1382         struct rte_ether_addr *mac_addr;
1383
1384         if (eth_dev == NULL) {
1385                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1386                 return -1;
1387         }
1388
1389         if (dst_mac_addr == NULL) {
1390                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1391                 return -1;
1392         }
1393
1394         mac_addr = eth_dev->data->mac_addrs;
1395
1396         rte_ether_addr_copy(mac_addr, dst_mac_addr);
1397         return 0;
1398 }
1399
1400 int
1401 mac_address_set(struct rte_eth_dev *eth_dev,
1402                 struct rte_ether_addr *new_mac_addr)
1403 {
1404         struct rte_ether_addr *mac_addr;
1405
1406         if (eth_dev == NULL) {
1407                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1408                 return -1;
1409         }
1410
1411         if (new_mac_addr == NULL) {
1412                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1413                 return -1;
1414         }
1415
1416         mac_addr = eth_dev->data->mac_addrs;
1417
1418         /* If new MAC is different to current MAC then update */
1419         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1420                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1421
1422         return 0;
1423 }
1424
1425 static const struct rte_ether_addr null_mac_addr;
1426
1427 /*
1428  * Add additional MAC addresses to the slave
1429  */
1430 int
1431 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1432                 uint16_t slave_port_id)
1433 {
1434         int i, ret;
1435         struct rte_ether_addr *mac_addr;
1436
1437         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1438                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1439                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1440                         break;
1441
1442                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1443                 if (ret < 0) {
1444                         /* rollback */
1445                         for (i--; i > 0; i--)
1446                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1447                                         &bonded_eth_dev->data->mac_addrs[i]);
1448                         return ret;
1449                 }
1450         }
1451
1452         return 0;
1453 }
1454
1455 /*
1456  * Remove additional MAC addresses from the slave
1457  */
1458 int
1459 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1460                 uint16_t slave_port_id)
1461 {
1462         int i, rc, ret;
1463         struct rte_ether_addr *mac_addr;
1464
1465         rc = 0;
1466         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1467                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1468                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1469                         break;
1470
1471                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1472                 /* save only the first error */
1473                 if (ret < 0 && rc == 0)
1474                         rc = ret;
1475         }
1476
1477         return rc;
1478 }
1479
1480 int
1481 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1482 {
1483         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1484         int i;
1485
1486         /* Update slave devices MAC addresses */
1487         if (internals->slave_count < 1)
1488                 return -1;
1489
1490         switch (internals->mode) {
1491         case BONDING_MODE_ROUND_ROBIN:
1492         case BONDING_MODE_BALANCE:
1493         case BONDING_MODE_BROADCAST:
1494                 for (i = 0; i < internals->slave_count; i++) {
1495                         if (rte_eth_dev_default_mac_addr_set(
1496                                         internals->slaves[i].port_id,
1497                                         bonded_eth_dev->data->mac_addrs)) {
1498                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1499                                                 internals->slaves[i].port_id);
1500                                 return -1;
1501                         }
1502                 }
1503                 break;
1504         case BONDING_MODE_8023AD:
1505                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1506                 break;
1507         case BONDING_MODE_ACTIVE_BACKUP:
1508         case BONDING_MODE_TLB:
1509         case BONDING_MODE_ALB:
1510         default:
1511                 for (i = 0; i < internals->slave_count; i++) {
1512                         if (internals->slaves[i].port_id ==
1513                                         internals->current_primary_port) {
1514                                 if (rte_eth_dev_default_mac_addr_set(
1515                                                 internals->primary_port,
1516                                                 bonded_eth_dev->data->mac_addrs)) {
1517                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1518                                                         internals->current_primary_port);
1519                                         return -1;
1520                                 }
1521                         } else {
1522                                 if (rte_eth_dev_default_mac_addr_set(
1523                                                 internals->slaves[i].port_id,
1524                                                 &internals->slaves[i].persisted_mac_addr)) {
1525                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1526                                                         internals->slaves[i].port_id);
1527                                         return -1;
1528                                 }
1529                         }
1530                 }
1531         }
1532
1533         return 0;
1534 }
1535
1536 int
1537 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1538 {
1539         struct bond_dev_private *internals;
1540
1541         internals = eth_dev->data->dev_private;
1542
1543         switch (mode) {
1544         case BONDING_MODE_ROUND_ROBIN:
1545                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1546                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1547                 break;
1548         case BONDING_MODE_ACTIVE_BACKUP:
1549                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1550                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1551                 break;
1552         case BONDING_MODE_BALANCE:
1553                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1554                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1555                 break;
1556         case BONDING_MODE_BROADCAST:
1557                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1558                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1559                 break;
1560         case BONDING_MODE_8023AD:
1561                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1562                         return -1;
1563
1564                 if (internals->mode4.dedicated_queues.enabled == 0) {
1565                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1566                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1567                         RTE_BOND_LOG(WARNING,
1568                                 "Using mode 4, it is necessary to do TX burst "
1569                                 "and RX burst at least every 100ms.");
1570                 } else {
1571                         /* Use flow director's optimization */
1572                         eth_dev->rx_pkt_burst =
1573                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1574                         eth_dev->tx_pkt_burst =
1575                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1576                 }
1577                 break;
1578         case BONDING_MODE_TLB:
1579                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1580                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1581                 break;
1582         case BONDING_MODE_ALB:
1583                 if (bond_mode_alb_enable(eth_dev) != 0)
1584                         return -1;
1585
1586                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1587                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1588                 break;
1589         default:
1590                 return -1;
1591         }
1592
1593         internals->mode = mode;
1594
1595         return 0;
1596 }
1597
1598
1599 static int
1600 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1601                 struct rte_eth_dev *slave_eth_dev)
1602 {
1603         int errval = 0;
1604         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1605         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1606
1607         if (port->slow_pool == NULL) {
1608                 char mem_name[256];
1609                 int slave_id = slave_eth_dev->data->port_id;
1610
1611                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1612                                 slave_id);
1613                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1614                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1615                         slave_eth_dev->data->numa_node);
1616
1617                 /* Any memory allocation failure in initialization is critical because
1618                  * resources can't be free, so reinitialization is impossible. */
1619                 if (port->slow_pool == NULL) {
1620                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1621                                 slave_id, mem_name, rte_strerror(rte_errno));
1622                 }
1623         }
1624
1625         if (internals->mode4.dedicated_queues.enabled == 1) {
1626                 /* Configure slow Rx queue */
1627
1628                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1629                                 internals->mode4.dedicated_queues.rx_qid, 128,
1630                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1631                                 NULL, port->slow_pool);
1632                 if (errval != 0) {
1633                         RTE_BOND_LOG(ERR,
1634                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1635                                         slave_eth_dev->data->port_id,
1636                                         internals->mode4.dedicated_queues.rx_qid,
1637                                         errval);
1638                         return errval;
1639                 }
1640
1641                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1642                                 internals->mode4.dedicated_queues.tx_qid, 512,
1643                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1644                                 NULL);
1645                 if (errval != 0) {
1646                         RTE_BOND_LOG(ERR,
1647                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1648                                 slave_eth_dev->data->port_id,
1649                                 internals->mode4.dedicated_queues.tx_qid,
1650                                 errval);
1651                         return errval;
1652                 }
1653         }
1654         return 0;
1655 }
1656
1657 int
1658 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1659                 struct rte_eth_dev *slave_eth_dev)
1660 {
1661         struct bond_rx_queue *bd_rx_q;
1662         struct bond_tx_queue *bd_tx_q;
1663         uint16_t nb_rx_queues;
1664         uint16_t nb_tx_queues;
1665
1666         int errval;
1667         uint16_t q_id;
1668         struct rte_flow_error flow_error;
1669
1670         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1671
1672         /* Stop slave */
1673         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1674
1675         /* Enable interrupts on slave device if supported */
1676         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1677                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1678
1679         /* If RSS is enabled for bonding, try to enable it for slaves  */
1680         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1681                 if (internals->rss_key_len != 0) {
1682                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1683                                         internals->rss_key_len;
1684                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1685                                         internals->rss_key;
1686                 } else {
1687                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1688                 }
1689
1690                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1691                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1692                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1693                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1694         }
1695
1696         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1697                         DEV_RX_OFFLOAD_VLAN_FILTER)
1698                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1699                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1700         else
1701                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1702                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1703
1704         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1705         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1706
1707         if (internals->mode == BONDING_MODE_8023AD) {
1708                 if (internals->mode4.dedicated_queues.enabled == 1) {
1709                         nb_rx_queues++;
1710                         nb_tx_queues++;
1711                 }
1712         }
1713
1714         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1715                                      bonded_eth_dev->data->mtu);
1716         if (errval != 0 && errval != -ENOTSUP) {
1717                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1718                                 slave_eth_dev->data->port_id, errval);
1719                 return errval;
1720         }
1721
1722         /* Configure device */
1723         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1724                         nb_rx_queues, nb_tx_queues,
1725                         &(slave_eth_dev->data->dev_conf));
1726         if (errval != 0) {
1727                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1728                                 slave_eth_dev->data->port_id, errval);
1729                 return errval;
1730         }
1731
1732         /* Setup Rx Queues */
1733         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1734                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1735
1736                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1737                                 bd_rx_q->nb_rx_desc,
1738                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1739                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1740                 if (errval != 0) {
1741                         RTE_BOND_LOG(ERR,
1742                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1743                                         slave_eth_dev->data->port_id, q_id, errval);
1744                         return errval;
1745                 }
1746         }
1747
1748         /* Setup Tx Queues */
1749         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1750                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1751
1752                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1753                                 bd_tx_q->nb_tx_desc,
1754                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1755                                 &bd_tx_q->tx_conf);
1756                 if (errval != 0) {
1757                         RTE_BOND_LOG(ERR,
1758                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1759                                 slave_eth_dev->data->port_id, q_id, errval);
1760                         return errval;
1761                 }
1762         }
1763
1764         if (internals->mode == BONDING_MODE_8023AD &&
1765                         internals->mode4.dedicated_queues.enabled == 1) {
1766                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1767                                 != 0)
1768                         return errval;
1769
1770                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1771                                 slave_eth_dev->data->port_id) != 0) {
1772                         RTE_BOND_LOG(ERR,
1773                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1774                                 slave_eth_dev->data->port_id, q_id, errval);
1775                         return -1;
1776                 }
1777
1778                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1779                         rte_flow_destroy(slave_eth_dev->data->port_id,
1780                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1781                                         &flow_error);
1782
1783                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1784                                 slave_eth_dev->data->port_id);
1785         }
1786
1787         /* Start device */
1788         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1789         if (errval != 0) {
1790                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1791                                 slave_eth_dev->data->port_id, errval);
1792                 return -1;
1793         }
1794
1795         /* If RSS is enabled for bonding, synchronize RETA */
1796         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1797                 int i;
1798                 struct bond_dev_private *internals;
1799
1800                 internals = bonded_eth_dev->data->dev_private;
1801
1802                 for (i = 0; i < internals->slave_count; i++) {
1803                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1804                                 errval = rte_eth_dev_rss_reta_update(
1805                                                 slave_eth_dev->data->port_id,
1806                                                 &internals->reta_conf[0],
1807                                                 internals->slaves[i].reta_size);
1808                                 if (errval != 0) {
1809                                         RTE_BOND_LOG(WARNING,
1810                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1811                                                      " RSS Configuration for bonding may be inconsistent.",
1812                                                      slave_eth_dev->data->port_id, errval);
1813                                 }
1814                                 break;
1815                         }
1816                 }
1817         }
1818
1819         /* If lsc interrupt is set, check initial slave's link status */
1820         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1821                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1822                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1823                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1824                         NULL);
1825         }
1826
1827         return 0;
1828 }
1829
1830 void
1831 slave_remove(struct bond_dev_private *internals,
1832                 struct rte_eth_dev *slave_eth_dev)
1833 {
1834         uint16_t i;
1835
1836         for (i = 0; i < internals->slave_count; i++)
1837                 if (internals->slaves[i].port_id ==
1838                                 slave_eth_dev->data->port_id)
1839                         break;
1840
1841         if (i < (internals->slave_count - 1)) {
1842                 struct rte_flow *flow;
1843
1844                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1845                                 sizeof(internals->slaves[0]) *
1846                                 (internals->slave_count - i - 1));
1847                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1848                         memmove(&flow->flows[i], &flow->flows[i + 1],
1849                                 sizeof(flow->flows[0]) *
1850                                 (internals->slave_count - i - 1));
1851                         flow->flows[internals->slave_count - 1] = NULL;
1852                 }
1853         }
1854
1855         internals->slave_count--;
1856
1857         /* force reconfiguration of slave interfaces */
1858         _rte_eth_dev_reset(slave_eth_dev);
1859 }
1860
1861 static void
1862 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1863
1864 void
1865 slave_add(struct bond_dev_private *internals,
1866                 struct rte_eth_dev *slave_eth_dev)
1867 {
1868         struct bond_slave_details *slave_details =
1869                         &internals->slaves[internals->slave_count];
1870
1871         slave_details->port_id = slave_eth_dev->data->port_id;
1872         slave_details->last_link_status = 0;
1873
1874         /* Mark slave devices that don't support interrupts so we can
1875          * compensate when we start the bond
1876          */
1877         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1878                 slave_details->link_status_poll_enabled = 1;
1879         }
1880
1881         slave_details->link_status_wait_to_complete = 0;
1882         /* clean tlb_last_obytes when adding port for bonding device */
1883         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1884                         sizeof(struct rte_ether_addr));
1885 }
1886
1887 void
1888 bond_ethdev_primary_set(struct bond_dev_private *internals,
1889                 uint16_t slave_port_id)
1890 {
1891         int i;
1892
1893         if (internals->active_slave_count < 1)
1894                 internals->current_primary_port = slave_port_id;
1895         else
1896                 /* Search bonded device slave ports for new proposed primary port */
1897                 for (i = 0; i < internals->active_slave_count; i++) {
1898                         if (internals->active_slaves[i] == slave_port_id)
1899                                 internals->current_primary_port = slave_port_id;
1900                 }
1901 }
1902
1903 static void
1904 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1905
1906 static int
1907 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1908 {
1909         struct bond_dev_private *internals;
1910         int i;
1911
1912         /* slave eth dev will be started by bonded device */
1913         if (check_for_bonded_ethdev(eth_dev)) {
1914                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1915                                 eth_dev->data->port_id);
1916                 return -1;
1917         }
1918
1919         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1920         eth_dev->data->dev_started = 1;
1921
1922         internals = eth_dev->data->dev_private;
1923
1924         if (internals->slave_count == 0) {
1925                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1926                 goto out_err;
1927         }
1928
1929         if (internals->user_defined_mac == 0) {
1930                 struct rte_ether_addr *new_mac_addr = NULL;
1931
1932                 for (i = 0; i < internals->slave_count; i++)
1933                         if (internals->slaves[i].port_id == internals->primary_port)
1934                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1935
1936                 if (new_mac_addr == NULL)
1937                         goto out_err;
1938
1939                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1940                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1941                                         eth_dev->data->port_id);
1942                         goto out_err;
1943                 }
1944         }
1945
1946         /* If bonded device is configure in promiscuous mode then re-apply config */
1947         if (internals->promiscuous_en)
1948                 bond_ethdev_promiscuous_enable(eth_dev);
1949
1950         if (internals->mode == BONDING_MODE_8023AD) {
1951                 if (internals->mode4.dedicated_queues.enabled == 1) {
1952                         internals->mode4.dedicated_queues.rx_qid =
1953                                         eth_dev->data->nb_rx_queues;
1954                         internals->mode4.dedicated_queues.tx_qid =
1955                                         eth_dev->data->nb_tx_queues;
1956                 }
1957         }
1958
1959
1960         /* Reconfigure each slave device if starting bonded device */
1961         for (i = 0; i < internals->slave_count; i++) {
1962                 struct rte_eth_dev *slave_ethdev =
1963                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1964                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1965                         RTE_BOND_LOG(ERR,
1966                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1967                                 eth_dev->data->port_id,
1968                                 internals->slaves[i].port_id);
1969                         goto out_err;
1970                 }
1971                 /* We will need to poll for link status if any slave doesn't
1972                  * support interrupts
1973                  */
1974                 if (internals->slaves[i].link_status_poll_enabled)
1975                         internals->link_status_polling_enabled = 1;
1976         }
1977
1978         /* start polling if needed */
1979         if (internals->link_status_polling_enabled) {
1980                 rte_eal_alarm_set(
1981                         internals->link_status_polling_interval_ms * 1000,
1982                         bond_ethdev_slave_link_status_change_monitor,
1983                         (void *)&rte_eth_devices[internals->port_id]);
1984         }
1985
1986         /* Update all slave devices MACs*/
1987         if (mac_address_slaves_update(eth_dev) != 0)
1988                 goto out_err;
1989
1990         if (internals->user_defined_primary_port)
1991                 bond_ethdev_primary_set(internals, internals->primary_port);
1992
1993         if (internals->mode == BONDING_MODE_8023AD)
1994                 bond_mode_8023ad_start(eth_dev);
1995
1996         if (internals->mode == BONDING_MODE_TLB ||
1997                         internals->mode == BONDING_MODE_ALB)
1998                 bond_tlb_enable(internals);
1999
2000         return 0;
2001
2002 out_err:
2003         eth_dev->data->dev_started = 0;
2004         return -1;
2005 }
2006
2007 static void
2008 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2009 {
2010         uint16_t i;
2011
2012         if (dev->data->rx_queues != NULL) {
2013                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2014                         rte_free(dev->data->rx_queues[i]);
2015                         dev->data->rx_queues[i] = NULL;
2016                 }
2017                 dev->data->nb_rx_queues = 0;
2018         }
2019
2020         if (dev->data->tx_queues != NULL) {
2021                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2022                         rte_free(dev->data->tx_queues[i]);
2023                         dev->data->tx_queues[i] = NULL;
2024                 }
2025                 dev->data->nb_tx_queues = 0;
2026         }
2027 }
2028
2029 void
2030 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2031 {
2032         struct bond_dev_private *internals = eth_dev->data->dev_private;
2033         uint16_t i;
2034
2035         if (internals->mode == BONDING_MODE_8023AD) {
2036                 struct port *port;
2037                 void *pkt = NULL;
2038
2039                 bond_mode_8023ad_stop(eth_dev);
2040
2041                 /* Discard all messages to/from mode 4 state machines */
2042                 for (i = 0; i < internals->active_slave_count; i++) {
2043                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2044
2045                         RTE_ASSERT(port->rx_ring != NULL);
2046                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2047                                 rte_pktmbuf_free(pkt);
2048
2049                         RTE_ASSERT(port->tx_ring != NULL);
2050                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2051                                 rte_pktmbuf_free(pkt);
2052                 }
2053         }
2054
2055         if (internals->mode == BONDING_MODE_TLB ||
2056                         internals->mode == BONDING_MODE_ALB) {
2057                 bond_tlb_disable(internals);
2058                 for (i = 0; i < internals->active_slave_count; i++)
2059                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2060         }
2061
2062         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2063         eth_dev->data->dev_started = 0;
2064
2065         internals->link_status_polling_enabled = 0;
2066         for (i = 0; i < internals->slave_count; i++) {
2067                 uint16_t slave_id = internals->slaves[i].port_id;
2068                 if (find_slave_by_id(internals->active_slaves,
2069                                 internals->active_slave_count, slave_id) !=
2070                                                 internals->active_slave_count) {
2071                         internals->slaves[i].last_link_status = 0;
2072                         rte_eth_dev_stop(slave_id);
2073                         deactivate_slave(eth_dev, slave_id);
2074                 }
2075         }
2076 }
2077
2078 void
2079 bond_ethdev_close(struct rte_eth_dev *dev)
2080 {
2081         struct bond_dev_private *internals = dev->data->dev_private;
2082         uint16_t bond_port_id = internals->port_id;
2083         int skipped = 0;
2084         struct rte_flow_error ferror;
2085
2086         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2087         while (internals->slave_count != skipped) {
2088                 uint16_t port_id = internals->slaves[skipped].port_id;
2089
2090                 rte_eth_dev_stop(port_id);
2091
2092                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2093                         RTE_BOND_LOG(ERR,
2094                                      "Failed to remove port %d from bonded device %s",
2095                                      port_id, dev->device->name);
2096                         skipped++;
2097                 }
2098         }
2099         bond_flow_ops.flush(dev, &ferror);
2100         bond_ethdev_free_queues(dev);
2101         rte_bitmap_reset(internals->vlan_filter_bmp);
2102 }
2103
2104 /* forward declaration */
2105 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2106
2107 static void
2108 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2109 {
2110         struct bond_dev_private *internals = dev->data->dev_private;
2111
2112         uint16_t max_nb_rx_queues = UINT16_MAX;
2113         uint16_t max_nb_tx_queues = UINT16_MAX;
2114         uint16_t max_rx_desc_lim = UINT16_MAX;
2115         uint16_t max_tx_desc_lim = UINT16_MAX;
2116
2117         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2118
2119         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2120                         internals->candidate_max_rx_pktlen :
2121                         RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2122
2123         /* Max number of tx/rx queues that the bonded device can support is the
2124          * minimum values of the bonded slaves, as all slaves must be capable
2125          * of supporting the same number of tx/rx queues.
2126          */
2127         if (internals->slave_count > 0) {
2128                 struct rte_eth_dev_info slave_info;
2129                 uint16_t idx;
2130
2131                 for (idx = 0; idx < internals->slave_count; idx++) {
2132                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2133                                         &slave_info);
2134
2135                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2136                                 max_nb_rx_queues = slave_info.max_rx_queues;
2137
2138                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2139                                 max_nb_tx_queues = slave_info.max_tx_queues;
2140
2141                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2142                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2143
2144                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2145                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2146                 }
2147         }
2148
2149         dev_info->max_rx_queues = max_nb_rx_queues;
2150         dev_info->max_tx_queues = max_nb_tx_queues;
2151
2152         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2153                sizeof(dev_info->default_rxconf));
2154         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2155                sizeof(dev_info->default_txconf));
2156
2157         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2158         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2159
2160         /**
2161          * If dedicated hw queues enabled for link bonding device in LACP mode
2162          * then we need to reduce the maximum number of data path queues by 1.
2163          */
2164         if (internals->mode == BONDING_MODE_8023AD &&
2165                 internals->mode4.dedicated_queues.enabled == 1) {
2166                 dev_info->max_rx_queues--;
2167                 dev_info->max_tx_queues--;
2168         }
2169
2170         dev_info->min_rx_bufsize = 0;
2171
2172         dev_info->rx_offload_capa = internals->rx_offload_capa;
2173         dev_info->tx_offload_capa = internals->tx_offload_capa;
2174         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2175         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2176         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2177
2178         dev_info->reta_size = internals->reta_size;
2179 }
2180
2181 static int
2182 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2183 {
2184         int res;
2185         uint16_t i;
2186         struct bond_dev_private *internals = dev->data->dev_private;
2187
2188         /* don't do this while a slave is being added */
2189         rte_spinlock_lock(&internals->lock);
2190
2191         if (on)
2192                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2193         else
2194                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2195
2196         for (i = 0; i < internals->slave_count; i++) {
2197                 uint16_t port_id = internals->slaves[i].port_id;
2198
2199                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2200                 if (res == ENOTSUP)
2201                         RTE_BOND_LOG(WARNING,
2202                                      "Setting VLAN filter on slave port %u not supported.",
2203                                      port_id);
2204         }
2205
2206         rte_spinlock_unlock(&internals->lock);
2207         return 0;
2208 }
2209
2210 static int
2211 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2212                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2213                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2214 {
2215         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2216                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2217                                         0, dev->data->numa_node);
2218         if (bd_rx_q == NULL)
2219                 return -1;
2220
2221         bd_rx_q->queue_id = rx_queue_id;
2222         bd_rx_q->dev_private = dev->data->dev_private;
2223
2224         bd_rx_q->nb_rx_desc = nb_rx_desc;
2225
2226         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2227         bd_rx_q->mb_pool = mb_pool;
2228
2229         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2230
2231         return 0;
2232 }
2233
2234 static int
2235 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2236                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2237                 const struct rte_eth_txconf *tx_conf)
2238 {
2239         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2240                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2241                                         0, dev->data->numa_node);
2242
2243         if (bd_tx_q == NULL)
2244                 return -1;
2245
2246         bd_tx_q->queue_id = tx_queue_id;
2247         bd_tx_q->dev_private = dev->data->dev_private;
2248
2249         bd_tx_q->nb_tx_desc = nb_tx_desc;
2250         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2251
2252         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2253
2254         return 0;
2255 }
2256
2257 static void
2258 bond_ethdev_rx_queue_release(void *queue)
2259 {
2260         if (queue == NULL)
2261                 return;
2262
2263         rte_free(queue);
2264 }
2265
2266 static void
2267 bond_ethdev_tx_queue_release(void *queue)
2268 {
2269         if (queue == NULL)
2270                 return;
2271
2272         rte_free(queue);
2273 }
2274
2275 static void
2276 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2277 {
2278         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2279         struct bond_dev_private *internals;
2280
2281         /* Default value for polling slave found is true as we don't want to
2282          * disable the polling thread if we cannot get the lock */
2283         int i, polling_slave_found = 1;
2284
2285         if (cb_arg == NULL)
2286                 return;
2287
2288         bonded_ethdev = cb_arg;
2289         internals = bonded_ethdev->data->dev_private;
2290
2291         if (!bonded_ethdev->data->dev_started ||
2292                 !internals->link_status_polling_enabled)
2293                 return;
2294
2295         /* If device is currently being configured then don't check slaves link
2296          * status, wait until next period */
2297         if (rte_spinlock_trylock(&internals->lock)) {
2298                 if (internals->slave_count > 0)
2299                         polling_slave_found = 0;
2300
2301                 for (i = 0; i < internals->slave_count; i++) {
2302                         if (!internals->slaves[i].link_status_poll_enabled)
2303                                 continue;
2304
2305                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2306                         polling_slave_found = 1;
2307
2308                         /* Update slave link status */
2309                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2310                                         internals->slaves[i].link_status_wait_to_complete);
2311
2312                         /* if link status has changed since last checked then call lsc
2313                          * event callback */
2314                         if (slave_ethdev->data->dev_link.link_status !=
2315                                         internals->slaves[i].last_link_status) {
2316                                 internals->slaves[i].last_link_status =
2317                                                 slave_ethdev->data->dev_link.link_status;
2318
2319                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2320                                                 RTE_ETH_EVENT_INTR_LSC,
2321                                                 &bonded_ethdev->data->port_id,
2322                                                 NULL);
2323                         }
2324                 }
2325                 rte_spinlock_unlock(&internals->lock);
2326         }
2327
2328         if (polling_slave_found)
2329                 /* Set alarm to continue monitoring link status of slave ethdev's */
2330                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2331                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2332 }
2333
2334 static int
2335 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2336 {
2337         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2338
2339         struct bond_dev_private *bond_ctx;
2340         struct rte_eth_link slave_link;
2341
2342         uint32_t idx;
2343
2344         bond_ctx = ethdev->data->dev_private;
2345
2346         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2347
2348         if (ethdev->data->dev_started == 0 ||
2349                         bond_ctx->active_slave_count == 0) {
2350                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2351                 return 0;
2352         }
2353
2354         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2355
2356         if (wait_to_complete)
2357                 link_update = rte_eth_link_get;
2358         else
2359                 link_update = rte_eth_link_get_nowait;
2360
2361         switch (bond_ctx->mode) {
2362         case BONDING_MODE_BROADCAST:
2363                 /**
2364                  * Setting link speed to UINT32_MAX to ensure we pick up the
2365                  * value of the first active slave
2366                  */
2367                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2368
2369                 /**
2370                  * link speed is minimum value of all the slaves link speed as
2371                  * packet loss will occur on this slave if transmission at rates
2372                  * greater than this are attempted
2373                  */
2374                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2375                         link_update(bond_ctx->active_slaves[0], &slave_link);
2376
2377                         if (slave_link.link_speed <
2378                                         ethdev->data->dev_link.link_speed)
2379                                 ethdev->data->dev_link.link_speed =
2380                                                 slave_link.link_speed;
2381                 }
2382                 break;
2383         case BONDING_MODE_ACTIVE_BACKUP:
2384                 /* Current primary slave */
2385                 link_update(bond_ctx->current_primary_port, &slave_link);
2386
2387                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2388                 break;
2389         case BONDING_MODE_8023AD:
2390                 ethdev->data->dev_link.link_autoneg =
2391                                 bond_ctx->mode4.slave_link.link_autoneg;
2392                 ethdev->data->dev_link.link_duplex =
2393                                 bond_ctx->mode4.slave_link.link_duplex;
2394                 /* fall through to update link speed */
2395         case BONDING_MODE_ROUND_ROBIN:
2396         case BONDING_MODE_BALANCE:
2397         case BONDING_MODE_TLB:
2398         case BONDING_MODE_ALB:
2399         default:
2400                 /**
2401                  * In theses mode the maximum theoretical link speed is the sum
2402                  * of all the slaves
2403                  */
2404                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2405
2406                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2407                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2408
2409                         ethdev->data->dev_link.link_speed +=
2410                                         slave_link.link_speed;
2411                 }
2412         }
2413
2414
2415         return 0;
2416 }
2417
2418
2419 static int
2420 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2421 {
2422         struct bond_dev_private *internals = dev->data->dev_private;
2423         struct rte_eth_stats slave_stats;
2424         int i, j;
2425
2426         for (i = 0; i < internals->slave_count; i++) {
2427                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2428
2429                 stats->ipackets += slave_stats.ipackets;
2430                 stats->opackets += slave_stats.opackets;
2431                 stats->ibytes += slave_stats.ibytes;
2432                 stats->obytes += slave_stats.obytes;
2433                 stats->imissed += slave_stats.imissed;
2434                 stats->ierrors += slave_stats.ierrors;
2435                 stats->oerrors += slave_stats.oerrors;
2436                 stats->rx_nombuf += slave_stats.rx_nombuf;
2437
2438                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2439                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2440                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2441                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2442                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2443                         stats->q_errors[j] += slave_stats.q_errors[j];
2444                 }
2445
2446         }
2447
2448         return 0;
2449 }
2450
2451 static void
2452 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2453 {
2454         struct bond_dev_private *internals = dev->data->dev_private;
2455         int i;
2456
2457         for (i = 0; i < internals->slave_count; i++)
2458                 rte_eth_stats_reset(internals->slaves[i].port_id);
2459 }
2460
2461 static void
2462 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2463 {
2464         struct bond_dev_private *internals = eth_dev->data->dev_private;
2465         int i;
2466
2467         internals->promiscuous_en = 1;
2468
2469         switch (internals->mode) {
2470         /* Promiscuous mode is propagated to all slaves */
2471         case BONDING_MODE_ROUND_ROBIN:
2472         case BONDING_MODE_BALANCE:
2473         case BONDING_MODE_BROADCAST:
2474                 for (i = 0; i < internals->slave_count; i++)
2475                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2476                 break;
2477         /* In mode4 promiscus mode is managed when slave is added/removed */
2478         case BONDING_MODE_8023AD:
2479                 break;
2480         /* Promiscuous mode is propagated only to primary slave */
2481         case BONDING_MODE_ACTIVE_BACKUP:
2482         case BONDING_MODE_TLB:
2483         case BONDING_MODE_ALB:
2484         default:
2485                 /* Do not touch promisc when there cannot be primary ports */
2486                 if (internals->slave_count == 0)
2487                         break;
2488                 rte_eth_promiscuous_enable(internals->current_primary_port);
2489         }
2490 }
2491
2492 static void
2493 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2494 {
2495         struct bond_dev_private *internals = dev->data->dev_private;
2496         int i;
2497
2498         internals->promiscuous_en = 0;
2499
2500         switch (internals->mode) {
2501         /* Promiscuous mode is propagated to all slaves */
2502         case BONDING_MODE_ROUND_ROBIN:
2503         case BONDING_MODE_BALANCE:
2504         case BONDING_MODE_BROADCAST:
2505                 for (i = 0; i < internals->slave_count; i++)
2506                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2507                 break;
2508         /* In mode4 promiscus mode is set managed when slave is added/removed */
2509         case BONDING_MODE_8023AD:
2510                 break;
2511         /* Promiscuous mode is propagated only to primary slave */
2512         case BONDING_MODE_ACTIVE_BACKUP:
2513         case BONDING_MODE_TLB:
2514         case BONDING_MODE_ALB:
2515         default:
2516                 /* Do not touch promisc when there cannot be primary ports */
2517                 if (internals->slave_count == 0)
2518                         break;
2519                 rte_eth_promiscuous_disable(internals->current_primary_port);
2520         }
2521 }
2522
2523 static void
2524 bond_ethdev_delayed_lsc_propagation(void *arg)
2525 {
2526         if (arg == NULL)
2527                 return;
2528
2529         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2530                         RTE_ETH_EVENT_INTR_LSC, NULL);
2531 }
2532
2533 int
2534 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2535                 void *param, void *ret_param __rte_unused)
2536 {
2537         struct rte_eth_dev *bonded_eth_dev;
2538         struct bond_dev_private *internals;
2539         struct rte_eth_link link;
2540         int rc = -1;
2541
2542         uint8_t lsc_flag = 0;
2543         int valid_slave = 0;
2544         uint16_t active_pos;
2545         uint16_t i;
2546
2547         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2548                 return rc;
2549
2550         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2551
2552         if (check_for_bonded_ethdev(bonded_eth_dev))
2553                 return rc;
2554
2555         internals = bonded_eth_dev->data->dev_private;
2556
2557         /* If the device isn't started don't handle interrupts */
2558         if (!bonded_eth_dev->data->dev_started)
2559                 return rc;
2560
2561         /* verify that port_id is a valid slave of bonded port */
2562         for (i = 0; i < internals->slave_count; i++) {
2563                 if (internals->slaves[i].port_id == port_id) {
2564                         valid_slave = 1;
2565                         break;
2566                 }
2567         }
2568
2569         if (!valid_slave)
2570                 return rc;
2571
2572         /* Synchronize lsc callback parallel calls either by real link event
2573          * from the slaves PMDs or by the bonding PMD itself.
2574          */
2575         rte_spinlock_lock(&internals->lsc_lock);
2576
2577         /* Search for port in active port list */
2578         active_pos = find_slave_by_id(internals->active_slaves,
2579                         internals->active_slave_count, port_id);
2580
2581         rte_eth_link_get_nowait(port_id, &link);
2582         if (link.link_status) {
2583                 if (active_pos < internals->active_slave_count)
2584                         goto link_update;
2585
2586                 /* check link state properties if bonded link is up*/
2587                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2588                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2589                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2590                                              "for slave %d in bonding mode %d",
2591                                              port_id, internals->mode);
2592                 } else {
2593                         /* inherit slave link properties */
2594                         link_properties_set(bonded_eth_dev, &link);
2595                 }
2596
2597                 /* If no active slave ports then set this port to be
2598                  * the primary port.
2599                  */
2600                 if (internals->active_slave_count < 1) {
2601                         /* If first active slave, then change link status */
2602                         bonded_eth_dev->data->dev_link.link_status =
2603                                                                 ETH_LINK_UP;
2604                         internals->current_primary_port = port_id;
2605                         lsc_flag = 1;
2606
2607                         mac_address_slaves_update(bonded_eth_dev);
2608                 }
2609
2610                 activate_slave(bonded_eth_dev, port_id);
2611
2612                 /* If the user has defined the primary port then default to
2613                  * using it.
2614                  */
2615                 if (internals->user_defined_primary_port &&
2616                                 internals->primary_port == port_id)
2617                         bond_ethdev_primary_set(internals, port_id);
2618         } else {
2619                 if (active_pos == internals->active_slave_count)
2620                         goto link_update;
2621
2622                 /* Remove from active slave list */
2623                 deactivate_slave(bonded_eth_dev, port_id);
2624
2625                 if (internals->active_slave_count < 1)
2626                         lsc_flag = 1;
2627
2628                 /* Update primary id, take first active slave from list or if none
2629                  * available set to -1 */
2630                 if (port_id == internals->current_primary_port) {
2631                         if (internals->active_slave_count > 0)
2632                                 bond_ethdev_primary_set(internals,
2633                                                 internals->active_slaves[0]);
2634                         else
2635                                 internals->current_primary_port = internals->primary_port;
2636                 }
2637         }
2638
2639 link_update:
2640         /**
2641          * Update bonded device link properties after any change to active
2642          * slaves
2643          */
2644         bond_ethdev_link_update(bonded_eth_dev, 0);
2645
2646         if (lsc_flag) {
2647                 /* Cancel any possible outstanding interrupts if delays are enabled */
2648                 if (internals->link_up_delay_ms > 0 ||
2649                         internals->link_down_delay_ms > 0)
2650                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2651                                         bonded_eth_dev);
2652
2653                 if (bonded_eth_dev->data->dev_link.link_status) {
2654                         if (internals->link_up_delay_ms > 0)
2655                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2656                                                 bond_ethdev_delayed_lsc_propagation,
2657                                                 (void *)bonded_eth_dev);
2658                         else
2659                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2660                                                 RTE_ETH_EVENT_INTR_LSC,
2661                                                 NULL);
2662
2663                 } else {
2664                         if (internals->link_down_delay_ms > 0)
2665                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2666                                                 bond_ethdev_delayed_lsc_propagation,
2667                                                 (void *)bonded_eth_dev);
2668                         else
2669                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2670                                                 RTE_ETH_EVENT_INTR_LSC,
2671                                                 NULL);
2672                 }
2673         }
2674
2675         rte_spinlock_unlock(&internals->lsc_lock);
2676
2677         return rc;
2678 }
2679
2680 static int
2681 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2682                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2683 {
2684         unsigned i, j;
2685         int result = 0;
2686         int slave_reta_size;
2687         unsigned reta_count;
2688         struct bond_dev_private *internals = dev->data->dev_private;
2689
2690         if (reta_size != internals->reta_size)
2691                 return -EINVAL;
2692
2693          /* Copy RETA table */
2694         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2695
2696         for (i = 0; i < reta_count; i++) {
2697                 internals->reta_conf[i].mask = reta_conf[i].mask;
2698                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2699                         if ((reta_conf[i].mask >> j) & 0x01)
2700                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2701         }
2702
2703         /* Fill rest of array */
2704         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2705                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2706                                 sizeof(internals->reta_conf[0]) * reta_count);
2707
2708         /* Propagate RETA over slaves */
2709         for (i = 0; i < internals->slave_count; i++) {
2710                 slave_reta_size = internals->slaves[i].reta_size;
2711                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2712                                 &internals->reta_conf[0], slave_reta_size);
2713                 if (result < 0)
2714                         return result;
2715         }
2716
2717         return 0;
2718 }
2719
2720 static int
2721 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2722                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2723 {
2724         int i, j;
2725         struct bond_dev_private *internals = dev->data->dev_private;
2726
2727         if (reta_size != internals->reta_size)
2728                 return -EINVAL;
2729
2730          /* Copy RETA table */
2731         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2732                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2733                         if ((reta_conf[i].mask >> j) & 0x01)
2734                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2735
2736         return 0;
2737 }
2738
2739 static int
2740 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2741                 struct rte_eth_rss_conf *rss_conf)
2742 {
2743         int i, result = 0;
2744         struct bond_dev_private *internals = dev->data->dev_private;
2745         struct rte_eth_rss_conf bond_rss_conf;
2746
2747         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2748
2749         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2750
2751         if (bond_rss_conf.rss_hf != 0)
2752                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2753
2754         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2755                         sizeof(internals->rss_key)) {
2756                 if (bond_rss_conf.rss_key_len == 0)
2757                         bond_rss_conf.rss_key_len = 40;
2758                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2759                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2760                                 internals->rss_key_len);
2761         }
2762
2763         for (i = 0; i < internals->slave_count; i++) {
2764                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2765                                 &bond_rss_conf);
2766                 if (result < 0)
2767                         return result;
2768         }
2769
2770         return 0;
2771 }
2772
2773 static int
2774 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2775                 struct rte_eth_rss_conf *rss_conf)
2776 {
2777         struct bond_dev_private *internals = dev->data->dev_private;
2778
2779         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2780         rss_conf->rss_key_len = internals->rss_key_len;
2781         if (rss_conf->rss_key)
2782                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2783
2784         return 0;
2785 }
2786
2787 static int
2788 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2789 {
2790         struct rte_eth_dev *slave_eth_dev;
2791         struct bond_dev_private *internals = dev->data->dev_private;
2792         int ret, i;
2793
2794         rte_spinlock_lock(&internals->lock);
2795
2796         for (i = 0; i < internals->slave_count; i++) {
2797                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2798                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2799                         rte_spinlock_unlock(&internals->lock);
2800                         return -ENOTSUP;
2801                 }
2802         }
2803         for (i = 0; i < internals->slave_count; i++) {
2804                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2805                 if (ret < 0) {
2806                         rte_spinlock_unlock(&internals->lock);
2807                         return ret;
2808                 }
2809         }
2810
2811         rte_spinlock_unlock(&internals->lock);
2812         return 0;
2813 }
2814
2815 static int
2816 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2817                         struct rte_ether_addr *addr)
2818 {
2819         if (mac_address_set(dev, addr)) {
2820                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2821                 return -EINVAL;
2822         }
2823
2824         return 0;
2825 }
2826
2827 static int
2828 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2829                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2830 {
2831         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2832                 *(const void **)arg = &bond_flow_ops;
2833                 return 0;
2834         }
2835         return -ENOTSUP;
2836 }
2837
2838 static int
2839 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2840                         struct rte_ether_addr *mac_addr,
2841                         __rte_unused uint32_t index, uint32_t vmdq)
2842 {
2843         struct rte_eth_dev *slave_eth_dev;
2844         struct bond_dev_private *internals = dev->data->dev_private;
2845         int ret, i;
2846
2847         rte_spinlock_lock(&internals->lock);
2848
2849         for (i = 0; i < internals->slave_count; i++) {
2850                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2851                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2852                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2853                         ret = -ENOTSUP;
2854                         goto end;
2855                 }
2856         }
2857
2858         for (i = 0; i < internals->slave_count; i++) {
2859                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2860                                 mac_addr, vmdq);
2861                 if (ret < 0) {
2862                         /* rollback */
2863                         for (i--; i >= 0; i--)
2864                                 rte_eth_dev_mac_addr_remove(
2865                                         internals->slaves[i].port_id, mac_addr);
2866                         goto end;
2867                 }
2868         }
2869
2870         ret = 0;
2871 end:
2872         rte_spinlock_unlock(&internals->lock);
2873         return ret;
2874 }
2875
2876 static void
2877 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2878 {
2879         struct rte_eth_dev *slave_eth_dev;
2880         struct bond_dev_private *internals = dev->data->dev_private;
2881         int i;
2882
2883         rte_spinlock_lock(&internals->lock);
2884
2885         for (i = 0; i < internals->slave_count; i++) {
2886                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2887                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2888                         goto end;
2889         }
2890
2891         struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
2892
2893         for (i = 0; i < internals->slave_count; i++)
2894                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2895                                 mac_addr);
2896
2897 end:
2898         rte_spinlock_unlock(&internals->lock);
2899 }
2900
2901 const struct eth_dev_ops default_dev_ops = {
2902         .dev_start            = bond_ethdev_start,
2903         .dev_stop             = bond_ethdev_stop,
2904         .dev_close            = bond_ethdev_close,
2905         .dev_configure        = bond_ethdev_configure,
2906         .dev_infos_get        = bond_ethdev_info,
2907         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2908         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2909         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2910         .rx_queue_release     = bond_ethdev_rx_queue_release,
2911         .tx_queue_release     = bond_ethdev_tx_queue_release,
2912         .link_update          = bond_ethdev_link_update,
2913         .stats_get            = bond_ethdev_stats_get,
2914         .stats_reset          = bond_ethdev_stats_reset,
2915         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2916         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2917         .reta_update          = bond_ethdev_rss_reta_update,
2918         .reta_query           = bond_ethdev_rss_reta_query,
2919         .rss_hash_update      = bond_ethdev_rss_hash_update,
2920         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2921         .mtu_set              = bond_ethdev_mtu_set,
2922         .mac_addr_set         = bond_ethdev_mac_address_set,
2923         .mac_addr_add         = bond_ethdev_mac_addr_add,
2924         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
2925         .filter_ctrl          = bond_filter_ctrl
2926 };
2927
2928 static int
2929 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2930 {
2931         const char *name = rte_vdev_device_name(dev);
2932         uint8_t socket_id = dev->device.numa_node;
2933         struct bond_dev_private *internals = NULL;
2934         struct rte_eth_dev *eth_dev = NULL;
2935         uint32_t vlan_filter_bmp_size;
2936
2937         /* now do all data allocation - for eth_dev structure, dummy pci driver
2938          * and internal (private) data
2939          */
2940
2941         /* reserve an ethdev entry */
2942         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2943         if (eth_dev == NULL) {
2944                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2945                 goto err;
2946         }
2947
2948         internals = eth_dev->data->dev_private;
2949         eth_dev->data->nb_rx_queues = (uint16_t)1;
2950         eth_dev->data->nb_tx_queues = (uint16_t)1;
2951
2952         /* Allocate memory for storing MAC addresses */
2953         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
2954                         BOND_MAX_MAC_ADDRS, 0, socket_id);
2955         if (eth_dev->data->mac_addrs == NULL) {
2956                 RTE_BOND_LOG(ERR,
2957                              "Failed to allocate %u bytes needed to store MAC addresses",
2958                              RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
2959                 goto err;
2960         }
2961
2962         eth_dev->dev_ops = &default_dev_ops;
2963         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2964
2965         rte_spinlock_init(&internals->lock);
2966         rte_spinlock_init(&internals->lsc_lock);
2967
2968         internals->port_id = eth_dev->data->port_id;
2969         internals->mode = BONDING_MODE_INVALID;
2970         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2971         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2972         internals->burst_xmit_hash = burst_xmit_l2_hash;
2973         internals->user_defined_mac = 0;
2974
2975         internals->link_status_polling_enabled = 0;
2976
2977         internals->link_status_polling_interval_ms =
2978                 DEFAULT_POLLING_INTERVAL_10_MS;
2979         internals->link_down_delay_ms = 0;
2980         internals->link_up_delay_ms = 0;
2981
2982         internals->slave_count = 0;
2983         internals->active_slave_count = 0;
2984         internals->rx_offload_capa = 0;
2985         internals->tx_offload_capa = 0;
2986         internals->rx_queue_offload_capa = 0;
2987         internals->tx_queue_offload_capa = 0;
2988         internals->candidate_max_rx_pktlen = 0;
2989         internals->max_rx_pktlen = 0;
2990
2991         /* Initially allow to choose any offload type */
2992         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2993
2994         memset(&internals->default_rxconf, 0,
2995                sizeof(internals->default_rxconf));
2996         memset(&internals->default_txconf, 0,
2997                sizeof(internals->default_txconf));
2998
2999         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3000         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3001
3002         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3003         memset(internals->slaves, 0, sizeof(internals->slaves));
3004
3005         TAILQ_INIT(&internals->flow_list);
3006         internals->flow_isolated_valid = 0;
3007
3008         /* Set mode 4 default configuration */
3009         bond_mode_8023ad_setup(eth_dev, NULL);
3010         if (bond_ethdev_mode_set(eth_dev, mode)) {
3011                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3012                                  eth_dev->data->port_id, mode);
3013                 goto err;
3014         }
3015
3016         vlan_filter_bmp_size =
3017                 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3018         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3019                                                    RTE_CACHE_LINE_SIZE);
3020         if (internals->vlan_filter_bmpmem == NULL) {
3021                 RTE_BOND_LOG(ERR,
3022                              "Failed to allocate vlan bitmap for bonded device %u",
3023                              eth_dev->data->port_id);
3024                 goto err;
3025         }
3026
3027         internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3028                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3029         if (internals->vlan_filter_bmp == NULL) {
3030                 RTE_BOND_LOG(ERR,
3031                              "Failed to init vlan bitmap for bonded device %u",
3032                              eth_dev->data->port_id);
3033                 rte_free(internals->vlan_filter_bmpmem);
3034                 goto err;
3035         }
3036
3037         return eth_dev->data->port_id;
3038
3039 err:
3040         rte_free(internals);
3041         if (eth_dev != NULL)
3042                 eth_dev->data->dev_private = NULL;
3043         rte_eth_dev_release_port(eth_dev);
3044         return -1;
3045 }
3046
3047 static int
3048 bond_probe(struct rte_vdev_device *dev)
3049 {
3050         const char *name;
3051         struct bond_dev_private *internals;
3052         struct rte_kvargs *kvlist;
3053         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3054         int  arg_count, port_id;
3055         uint8_t agg_mode;
3056         struct rte_eth_dev *eth_dev;
3057
3058         if (!dev)
3059                 return -EINVAL;
3060
3061         name = rte_vdev_device_name(dev);
3062         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3063
3064         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3065                 eth_dev = rte_eth_dev_attach_secondary(name);
3066                 if (!eth_dev) {
3067                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3068                         return -1;
3069                 }
3070                 /* TODO: request info from primary to set up Rx and Tx */
3071                 eth_dev->dev_ops = &default_dev_ops;
3072                 eth_dev->device = &dev->device;
3073                 rte_eth_dev_probing_finish(eth_dev);
3074                 return 0;
3075         }
3076
3077         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3078                 pmd_bond_init_valid_arguments);
3079         if (kvlist == NULL)
3080                 return -1;
3081
3082         /* Parse link bonding mode */
3083         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3084                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3085                                 &bond_ethdev_parse_slave_mode_kvarg,
3086                                 &bonding_mode) != 0) {
3087                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3088                                         name);
3089                         goto parse_error;
3090                 }
3091         } else {
3092                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3093                                 "device %s", name);
3094                 goto parse_error;
3095         }
3096
3097         /* Parse socket id to create bonding device on */
3098         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3099         if (arg_count == 1) {
3100                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3101                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3102                                 != 0) {
3103                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3104                                         "bonded device %s", name);
3105                         goto parse_error;
3106                 }
3107         } else if (arg_count > 1) {
3108                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3109                                 "bonded device %s", name);
3110                 goto parse_error;
3111         } else {
3112                 socket_id = rte_socket_id();
3113         }
3114
3115         dev->device.numa_node = socket_id;
3116
3117         /* Create link bonding eth device */
3118         port_id = bond_alloc(dev, bonding_mode);
3119         if (port_id < 0) {
3120                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3121                                 "socket %u.",   name, bonding_mode, socket_id);
3122                 goto parse_error;
3123         }
3124         internals = rte_eth_devices[port_id].data->dev_private;
3125         internals->kvlist = kvlist;
3126
3127         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3128                 if (rte_kvargs_process(kvlist,
3129                                 PMD_BOND_AGG_MODE_KVARG,
3130                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3131                                 &agg_mode) != 0) {
3132                         RTE_BOND_LOG(ERR,
3133                                         "Failed to parse agg selection mode for bonded device %s",
3134                                         name);
3135                         goto parse_error;
3136                 }
3137
3138                 if (internals->mode == BONDING_MODE_8023AD)
3139                         internals->mode4.agg_selection = agg_mode;
3140         } else {
3141                 internals->mode4.agg_selection = AGG_STABLE;
3142         }
3143
3144         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3145         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3146                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3147         return 0;
3148
3149 parse_error:
3150         rte_kvargs_free(kvlist);
3151
3152         return -1;
3153 }
3154
3155 static int
3156 bond_remove(struct rte_vdev_device *dev)
3157 {
3158         struct rte_eth_dev *eth_dev;
3159         struct bond_dev_private *internals;
3160         const char *name;
3161
3162         if (!dev)
3163                 return -EINVAL;
3164
3165         name = rte_vdev_device_name(dev);
3166         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3167
3168         /* now free all data allocation - for eth_dev structure,
3169          * dummy pci driver and internal (private) data
3170          */
3171
3172         /* find an ethdev entry */
3173         eth_dev = rte_eth_dev_allocated(name);
3174         if (eth_dev == NULL)
3175                 return -ENODEV;
3176
3177         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3178                 return rte_eth_dev_release_port(eth_dev);
3179
3180         RTE_ASSERT(eth_dev->device == &dev->device);
3181
3182         internals = eth_dev->data->dev_private;
3183         if (internals->slave_count != 0)
3184                 return -EBUSY;
3185
3186         if (eth_dev->data->dev_started == 1) {
3187                 bond_ethdev_stop(eth_dev);
3188                 bond_ethdev_close(eth_dev);
3189         }
3190
3191         eth_dev->dev_ops = NULL;
3192         eth_dev->rx_pkt_burst = NULL;
3193         eth_dev->tx_pkt_burst = NULL;
3194
3195         internals = eth_dev->data->dev_private;
3196         /* Try to release mempool used in mode6. If the bond
3197          * device is not mode6, free the NULL is not problem.
3198          */
3199         rte_mempool_free(internals->mode6.mempool);
3200         rte_bitmap_free(internals->vlan_filter_bmp);
3201         rte_free(internals->vlan_filter_bmpmem);
3202
3203         rte_eth_dev_release_port(eth_dev);
3204
3205         return 0;
3206 }
3207
3208 /* this part will resolve the slave portids after all the other pdev and vdev
3209  * have been allocated */
3210 static int
3211 bond_ethdev_configure(struct rte_eth_dev *dev)
3212 {
3213         const char *name = dev->device->name;
3214         struct bond_dev_private *internals = dev->data->dev_private;
3215         struct rte_kvargs *kvlist = internals->kvlist;
3216         int arg_count;
3217         uint16_t port_id = dev - rte_eth_devices;
3218         uint8_t agg_mode;
3219
3220         static const uint8_t default_rss_key[40] = {
3221                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3222                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3223                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3224                 0xBE, 0xAC, 0x01, 0xFA
3225         };
3226
3227         unsigned i, j;
3228
3229         /*
3230          * If RSS is enabled, fill table with default values and
3231          * set key to the the value specified in port RSS configuration.
3232          * Fall back to default RSS key if the key is not specified
3233          */
3234         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3235                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3236                         internals->rss_key_len =
3237                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3238                         memcpy(internals->rss_key,
3239                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3240                                internals->rss_key_len);
3241                 } else {
3242                         internals->rss_key_len = sizeof(default_rss_key);
3243                         memcpy(internals->rss_key, default_rss_key,
3244                                internals->rss_key_len);
3245                 }
3246
3247                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3248                         internals->reta_conf[i].mask = ~0LL;
3249                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3250                                 internals->reta_conf[i].reta[j] =
3251                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3252                                                 dev->data->nb_rx_queues;
3253                 }
3254         }
3255
3256         /* set the max_rx_pktlen */
3257         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3258
3259         /*
3260          * if no kvlist, it means that this bonded device has been created
3261          * through the bonding api.
3262          */
3263         if (!kvlist)
3264                 return 0;
3265
3266         /* Parse MAC address for bonded device */
3267         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3268         if (arg_count == 1) {
3269                 struct rte_ether_addr bond_mac;
3270
3271                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3272                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3273                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3274                                      name);
3275                         return -1;
3276                 }
3277
3278                 /* Set MAC address */
3279                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3280                         RTE_BOND_LOG(ERR,
3281                                      "Failed to set mac address on bonded device %s",
3282                                      name);
3283                         return -1;
3284                 }
3285         } else if (arg_count > 1) {
3286                 RTE_BOND_LOG(ERR,
3287                              "MAC address can be specified only once for bonded device %s",
3288                              name);
3289                 return -1;
3290         }
3291
3292         /* Parse/set balance mode transmit policy */
3293         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3294         if (arg_count == 1) {
3295                 uint8_t xmit_policy;
3296
3297                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3298                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3299                     0) {
3300                         RTE_BOND_LOG(INFO,
3301                                      "Invalid xmit policy specified for bonded device %s",
3302                                      name);
3303                         return -1;
3304                 }
3305
3306                 /* Set balance mode transmit policy*/
3307                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3308                         RTE_BOND_LOG(ERR,
3309                                      "Failed to set balance xmit policy on bonded device %s",
3310                                      name);
3311                         return -1;
3312                 }
3313         } else if (arg_count > 1) {
3314                 RTE_BOND_LOG(ERR,
3315                              "Transmit policy can be specified only once for bonded device %s",
3316                              name);
3317                 return -1;
3318         }
3319
3320         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3321                 if (rte_kvargs_process(kvlist,
3322                                        PMD_BOND_AGG_MODE_KVARG,
3323                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3324                                        &agg_mode) != 0) {
3325                         RTE_BOND_LOG(ERR,
3326                                      "Failed to parse agg selection mode for bonded device %s",
3327                                      name);
3328                 }
3329                 if (internals->mode == BONDING_MODE_8023AD) {
3330                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3331                                         agg_mode);
3332                         if (ret < 0) {
3333                                 RTE_BOND_LOG(ERR,
3334                                         "Invalid args for agg selection set for bonded device %s",
3335                                         name);
3336                                 return -1;
3337                         }
3338                 }
3339         }
3340
3341         /* Parse/add slave ports to bonded device */
3342         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3343                 struct bond_ethdev_slave_ports slave_ports;
3344                 unsigned i;
3345
3346                 memset(&slave_ports, 0, sizeof(slave_ports));
3347
3348                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3349                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3350                         RTE_BOND_LOG(ERR,
3351                                      "Failed to parse slave ports for bonded device %s",
3352                                      name);
3353                         return -1;
3354                 }
3355
3356                 for (i = 0; i < slave_ports.slave_count; i++) {
3357                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3358                                 RTE_BOND_LOG(ERR,
3359                                              "Failed to add port %d as slave to bonded device %s",
3360                                              slave_ports.slaves[i], name);
3361                         }
3362                 }
3363
3364         } else {
3365                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3366                 return -1;
3367         }
3368
3369         /* Parse/set primary slave port id*/
3370         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3371         if (arg_count == 1) {
3372                 uint16_t primary_slave_port_id;
3373
3374                 if (rte_kvargs_process(kvlist,
3375                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3376                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3377                                        &primary_slave_port_id) < 0) {
3378                         RTE_BOND_LOG(INFO,
3379                                      "Invalid primary slave port id specified for bonded device %s",
3380                                      name);
3381                         return -1;
3382                 }
3383
3384                 /* Set balance mode transmit policy*/
3385                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3386                     != 0) {
3387                         RTE_BOND_LOG(ERR,
3388                                      "Failed to set primary slave port %d on bonded device %s",
3389                                      primary_slave_port_id, name);
3390                         return -1;
3391                 }
3392         } else if (arg_count > 1) {
3393                 RTE_BOND_LOG(INFO,
3394                              "Primary slave can be specified only once for bonded device %s",
3395                              name);
3396                 return -1;
3397         }
3398
3399         /* Parse link status monitor polling interval */
3400         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3401         if (arg_count == 1) {
3402                 uint32_t lsc_poll_interval_ms;
3403
3404                 if (rte_kvargs_process(kvlist,
3405                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3406                                        &bond_ethdev_parse_time_ms_kvarg,
3407                                        &lsc_poll_interval_ms) < 0) {
3408                         RTE_BOND_LOG(INFO,
3409                                      "Invalid lsc polling interval value specified for bonded"
3410                                      " device %s", name);
3411                         return -1;
3412                 }
3413
3414                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3415                     != 0) {
3416                         RTE_BOND_LOG(ERR,
3417                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3418                                      lsc_poll_interval_ms, name);
3419                         return -1;
3420                 }
3421         } else if (arg_count > 1) {
3422                 RTE_BOND_LOG(INFO,
3423                              "LSC polling interval can be specified only once for bonded"
3424                              " device %s", name);
3425                 return -1;
3426         }
3427
3428         /* Parse link up interrupt propagation delay */
3429         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3430         if (arg_count == 1) {
3431                 uint32_t link_up_delay_ms;
3432
3433                 if (rte_kvargs_process(kvlist,
3434                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3435                                        &bond_ethdev_parse_time_ms_kvarg,
3436                                        &link_up_delay_ms) < 0) {
3437                         RTE_BOND_LOG(INFO,
3438                                      "Invalid link up propagation delay value specified for"
3439                                      " bonded device %s", name);
3440                         return -1;
3441                 }
3442
3443                 /* Set balance mode transmit policy*/
3444                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3445                     != 0) {
3446                         RTE_BOND_LOG(ERR,
3447                                      "Failed to set link up propagation delay (%u ms) on bonded"
3448                                      " device %s", link_up_delay_ms, name);
3449                         return -1;
3450                 }
3451         } else if (arg_count > 1) {
3452                 RTE_BOND_LOG(INFO,
3453                              "Link up propagation delay can be specified only once for"
3454                              " bonded device %s", name);
3455                 return -1;
3456         }
3457
3458         /* Parse link down interrupt propagation delay */
3459         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3460         if (arg_count == 1) {
3461                 uint32_t link_down_delay_ms;
3462
3463                 if (rte_kvargs_process(kvlist,
3464                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3465                                        &bond_ethdev_parse_time_ms_kvarg,
3466                                        &link_down_delay_ms) < 0) {
3467                         RTE_BOND_LOG(INFO,
3468                                      "Invalid link down propagation delay value specified for"
3469                                      " bonded device %s", name);
3470                         return -1;
3471                 }
3472
3473                 /* Set balance mode transmit policy*/
3474                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3475                     != 0) {
3476                         RTE_BOND_LOG(ERR,
3477                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3478                                      link_down_delay_ms, name);
3479                         return -1;
3480                 }
3481         } else if (arg_count > 1) {
3482                 RTE_BOND_LOG(INFO,
3483                              "Link down propagation delay can be specified only once for  bonded device %s",
3484                              name);
3485                 return -1;
3486         }
3487
3488         return 0;
3489 }
3490
3491 struct rte_vdev_driver pmd_bond_drv = {
3492         .probe = bond_probe,
3493         .remove = bond_remove,
3494 };
3495
3496 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3497 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3498
3499 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3500         "slave=<ifc> "
3501         "primary=<ifc> "
3502         "mode=[0-6] "
3503         "xmit_policy=[l2 | l23 | l34] "
3504         "agg_mode=[count | stable | bandwidth] "
3505         "socket_id=<int> "
3506         "mac=<mac addr> "
3507         "lsc_poll_period_ms=<int> "
3508         "up_delay=<int> "
3509         "down_delay=<int>");
3510
3511 int bond_logtype;
3512
3513 RTE_INIT(bond_init_log)
3514 {
3515         bond_logtype = rte_log_register("pmd.net.bond");
3516         if (bond_logtype >= 0)
3517                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3518 }