net/bonding: prefer allmulti to promiscuous for LACP
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22
23 #include "rte_eth_bond.h"
24 #include "rte_eth_bond_private.h"
25 #include "rte_eth_bond_8023ad_private.h"
26
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35
36 static inline size_t
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39         size_t vlan_offset = 0;
40
41         if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42                 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43                 struct rte_vlan_hdr *vlan_hdr =
44                         (struct rte_vlan_hdr *)(eth_hdr + 1);
45
46                 vlan_offset = sizeof(struct rte_vlan_hdr);
47                 *proto = vlan_hdr->eth_proto;
48
49                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50                         vlan_hdr = vlan_hdr + 1;
51                         *proto = vlan_hdr->eth_proto;
52                         vlan_offset += sizeof(struct rte_vlan_hdr);
53                 }
54         }
55         return vlan_offset;
56 }
57
58 static uint16_t
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61         struct bond_dev_private *internals;
62
63         uint16_t num_rx_total = 0;
64         uint16_t slave_count;
65         uint16_t active_slave;
66         int i;
67
68         /* Cast to structure, containing bonded device's port id and queue id */
69         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70         internals = bd_rx_q->dev_private;
71         slave_count = internals->active_slave_count;
72         active_slave = internals->active_slave;
73
74         for (i = 0; i < slave_count && nb_pkts; i++) {
75                 uint16_t num_rx_slave;
76
77                 /* Offset of pointer to *bufs increases as packets are received
78                  * from other slaves */
79                 num_rx_slave =
80                         rte_eth_rx_burst(internals->active_slaves[active_slave],
81                                          bd_rx_q->queue_id,
82                                          bufs + num_rx_total, nb_pkts);
83                 num_rx_total += num_rx_slave;
84                 nb_pkts -= num_rx_slave;
85                 if (++active_slave == slave_count)
86                         active_slave = 0;
87         }
88
89         if (++internals->active_slave >= slave_count)
90                 internals->active_slave = 0;
91         return num_rx_total;
92 }
93
94 static uint16_t
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96                 uint16_t nb_pkts)
97 {
98         struct bond_dev_private *internals;
99
100         /* Cast to structure, containing bonded device's port id and queue id */
101         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102
103         internals = bd_rx_q->dev_private;
104
105         return rte_eth_rx_burst(internals->current_primary_port,
106                         bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112         const uint16_t ether_type_slow_be =
113                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114
115         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116                 (ethertype == ether_type_slow_be &&
117                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125         .dst.addr_bytes = { 0 },
126         .src.addr_bytes = { 0 },
127         .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131         .dst.addr_bytes = { 0 },
132         .src.addr_bytes = { 0 },
133         .type = 0xFFFF,
134 };
135
136 static struct rte_flow_item flow_item_8023ad[] = {
137         {
138                 .type = RTE_FLOW_ITEM_TYPE_ETH,
139                 .spec = &flow_item_eth_type_8023ad,
140                 .last = NULL,
141                 .mask = &flow_item_eth_mask_type_8023ad,
142         },
143         {
144                 .type = RTE_FLOW_ITEM_TYPE_END,
145                 .spec = NULL,
146                 .last = NULL,
147                 .mask = NULL,
148         }
149 };
150
151 const struct rte_flow_attr flow_attr_8023ad = {
152         .group = 0,
153         .priority = 0,
154         .ingress = 1,
155         .egress = 0,
156         .reserved = 0,
157 };
158
159 int
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161                 uint16_t slave_port) {
162         struct rte_eth_dev_info slave_info;
163         struct rte_flow_error error;
164         struct bond_dev_private *internals = bond_dev->data->dev_private;
165
166         const struct rte_flow_action_queue lacp_queue_conf = {
167                 .index = 0,
168         };
169
170         const struct rte_flow_action actions[] = {
171                 {
172                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173                         .conf = &lacp_queue_conf
174                 },
175                 {
176                         .type = RTE_FLOW_ACTION_TYPE_END,
177                 }
178         };
179
180         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181                         flow_item_8023ad, actions, &error);
182         if (ret < 0) {
183                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184                                 __func__, error.message, slave_port,
185                                 internals->mode4.dedicated_queues.rx_qid);
186                 return -1;
187         }
188
189         rte_eth_dev_info_get(slave_port, &slave_info);
190         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
191                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
192                 RTE_BOND_LOG(ERR,
193                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
194                         __func__, slave_port);
195                 return -1;
196         }
197
198         return 0;
199 }
200
201 int
202 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
203         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
204         struct bond_dev_private *internals = bond_dev->data->dev_private;
205         struct rte_eth_dev_info bond_info;
206         uint16_t idx;
207
208         /* Verify if all slaves in bonding supports flow director and */
209         if (internals->slave_count > 0) {
210                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
211
212                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
214
215                 for (idx = 0; idx < internals->slave_count; idx++) {
216                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
217                                         internals->slaves[idx].port_id) != 0)
218                                 return -1;
219                 }
220         }
221
222         return 0;
223 }
224
225 int
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
227
228         struct rte_flow_error error;
229         struct bond_dev_private *internals = bond_dev->data->dev_private;
230         struct rte_flow_action_queue lacp_queue_conf = {
231                 .index = internals->mode4.dedicated_queues.rx_qid,
232         };
233
234         const struct rte_flow_action actions[] = {
235                 {
236                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
237                         .conf = &lacp_queue_conf
238                 },
239                 {
240                         .type = RTE_FLOW_ACTION_TYPE_END,
241                 }
242         };
243
244         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
245                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
246         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
247                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
248                                 "(slave_port=%d queue_id=%d)",
249                                 error.message, slave_port,
250                                 internals->mode4.dedicated_queues.rx_qid);
251                 return -1;
252         }
253
254         return 0;
255 }
256
257 static inline uint16_t
258 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
259                 bool dedicated_rxq)
260 {
261         /* Cast to structure, containing bonded device's port id and queue id */
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         struct rte_eth_dev *bonded_eth_dev =
265                                         &rte_eth_devices[internals->port_id];
266         struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
267         struct rte_ether_hdr *hdr;
268
269         const uint16_t ether_type_slow_be =
270                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
271         uint16_t num_rx_total = 0;      /* Total number of received packets */
272         uint16_t slaves[RTE_MAX_ETHPORTS];
273         uint16_t slave_count, idx;
274
275         uint8_t collecting;  /* current slave collecting status */
276         const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
277         const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
278         uint8_t subtype;
279         uint16_t i;
280         uint16_t j;
281         uint16_t k;
282
283         /* Copy slave list to protect against slave up/down changes during tx
284          * bursting */
285         slave_count = internals->active_slave_count;
286         memcpy(slaves, internals->active_slaves,
287                         sizeof(internals->active_slaves[0]) * slave_count);
288
289         idx = internals->active_slave;
290         if (idx >= slave_count) {
291                 internals->active_slave = 0;
292                 idx = 0;
293         }
294         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
295                 j = num_rx_total;
296                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
297                                          COLLECTING);
298
299                 /* Read packets from this slave */
300                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
301                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
302
303                 for (k = j; k < 2 && k < num_rx_total; k++)
304                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
305
306                 /* Handle slow protocol packets. */
307                 while (j < num_rx_total) {
308                         if (j + 3 < num_rx_total)
309                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
310
311                         hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
312                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
313
314                         /* Remove packet from array if:
315                          * - it is slow packet but no dedicated rxq is present,
316                          * - slave is not in collecting state,
317                          * - bonding interface is not in promiscuous mode:
318                          *   - packet is unicast and address does not match,
319                          *   - packet is multicast and bonding interface
320                          *     is not in allmulti,
321                          */
322                         if (unlikely(
323                                 (!dedicated_rxq &&
324                                  is_lacp_packets(hdr->ether_type, subtype,
325                                                  bufs[j])) ||
326                                 !collecting ||
327                                 (!promisc &&
328                                  ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
329                                    !rte_is_same_ether_addr(bond_mac,
330                                                        &hdr->d_addr)) ||
331                                   (!allmulti &&
332                                    rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
333
334                                 if (hdr->ether_type == ether_type_slow_be) {
335                                         bond_mode_8023ad_handle_slow_pkt(
336                                             internals, slaves[idx], bufs[j]);
337                                 } else
338                                         rte_pktmbuf_free(bufs[j]);
339
340                                 /* Packet is managed by mode 4 or dropped, shift the array */
341                                 num_rx_total--;
342                                 if (j < num_rx_total) {
343                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
344                                                 (num_rx_total - j));
345                                 }
346                         } else
347                                 j++;
348                 }
349                 if (unlikely(++idx == slave_count))
350                         idx = 0;
351         }
352
353         if (++internals->active_slave >= slave_count)
354                 internals->active_slave = 0;
355
356         return num_rx_total;
357 }
358
359 static uint16_t
360 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
361                 uint16_t nb_pkts)
362 {
363         return rx_burst_8023ad(queue, bufs, nb_pkts, false);
364 }
365
366 static uint16_t
367 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
368                 uint16_t nb_pkts)
369 {
370         return rx_burst_8023ad(queue, bufs, nb_pkts, true);
371 }
372
373 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
374 uint32_t burstnumberRX;
375 uint32_t burstnumberTX;
376
377 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
378
379 static void
380 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
381 {
382         switch (arp_op) {
383         case RTE_ARP_OP_REQUEST:
384                 strlcpy(buf, "ARP Request", buf_len);
385                 return;
386         case RTE_ARP_OP_REPLY:
387                 strlcpy(buf, "ARP Reply", buf_len);
388                 return;
389         case RTE_ARP_OP_REVREQUEST:
390                 strlcpy(buf, "Reverse ARP Request", buf_len);
391                 return;
392         case RTE_ARP_OP_REVREPLY:
393                 strlcpy(buf, "Reverse ARP Reply", buf_len);
394                 return;
395         case RTE_ARP_OP_INVREQUEST:
396                 strlcpy(buf, "Peer Identify Request", buf_len);
397                 return;
398         case RTE_ARP_OP_INVREPLY:
399                 strlcpy(buf, "Peer Identify Reply", buf_len);
400                 return;
401         default:
402                 break;
403         }
404         strlcpy(buf, "Unknown", buf_len);
405         return;
406 }
407 #endif
408 #define MaxIPv4String   16
409 static void
410 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
411 {
412         uint32_t ipv4_addr;
413
414         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
415         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
416                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
417                 ipv4_addr & 0xFF);
418 }
419
420 #define MAX_CLIENTS_NUMBER      128
421 uint8_t active_clients;
422 struct client_stats_t {
423         uint16_t port;
424         uint32_t ipv4_addr;
425         uint32_t ipv4_rx_packets;
426         uint32_t ipv4_tx_packets;
427 };
428 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
429
430 static void
431 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
432 {
433         int i = 0;
434
435         for (; i < MAX_CLIENTS_NUMBER; i++)     {
436                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
437                         /* Just update RX packets number for this client */
438                         if (TXorRXindicator == &burstnumberRX)
439                                 client_stats[i].ipv4_rx_packets++;
440                         else
441                                 client_stats[i].ipv4_tx_packets++;
442                         return;
443                 }
444         }
445         /* We have a new client. Insert him to the table, and increment stats */
446         if (TXorRXindicator == &burstnumberRX)
447                 client_stats[active_clients].ipv4_rx_packets++;
448         else
449                 client_stats[active_clients].ipv4_tx_packets++;
450         client_stats[active_clients].ipv4_addr = addr;
451         client_stats[active_clients].port = port;
452         active_clients++;
453
454 }
455
456 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
457 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
458         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
459                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
460                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
461                 info,                                                   \
462                 port,                                                   \
463                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
464                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
465                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
466                 src_ip,                                                 \
467                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
468                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
469                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
470                 dst_ip,                                                 \
471                 arp_op, ++burstnumber)
472 #endif
473
474 static void
475 mode6_debug(const char __attribute__((unused)) *info,
476         struct rte_ether_hdr *eth_h, uint16_t port,
477         uint32_t __attribute__((unused)) *burstnumber)
478 {
479         struct rte_ipv4_hdr *ipv4_h;
480 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
481         struct rte_arp_hdr *arp_h;
482         char dst_ip[16];
483         char ArpOp[24];
484         char buf[16];
485 #endif
486         char src_ip[16];
487
488         uint16_t ether_type = eth_h->ether_type;
489         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
490
491 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492         strlcpy(buf, info, 16);
493 #endif
494
495         if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
496                 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
497                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
498 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
499                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
500                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
501 #endif
502                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
503         }
504 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
505         else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
506                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
507                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
508                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
509                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
510                                 ArpOp, sizeof(ArpOp));
511                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
512         }
513 #endif
514 }
515 #endif
516
517 static uint16_t
518 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
519 {
520         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
521         struct bond_dev_private *internals = bd_tx_q->dev_private;
522         struct rte_ether_hdr *eth_h;
523         uint16_t ether_type, offset;
524         uint16_t nb_recv_pkts;
525         int i;
526
527         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
528
529         for (i = 0; i < nb_recv_pkts; i++) {
530                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
531                 ether_type = eth_h->ether_type;
532                 offset = get_vlan_offset(eth_h, &ether_type);
533
534                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
535 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
536                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
537 #endif
538                         bond_mode_alb_arp_recv(eth_h, offset, internals);
539                 }
540 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
541                 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
542                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
543 #endif
544         }
545
546         return nb_recv_pkts;
547 }
548
549 static uint16_t
550 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
551                 uint16_t nb_pkts)
552 {
553         struct bond_dev_private *internals;
554         struct bond_tx_queue *bd_tx_q;
555
556         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
557         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
558
559         uint16_t num_of_slaves;
560         uint16_t slaves[RTE_MAX_ETHPORTS];
561
562         uint16_t num_tx_total = 0, num_tx_slave;
563
564         static int slave_idx = 0;
565         int i, cslave_idx = 0, tx_fail_total = 0;
566
567         bd_tx_q = (struct bond_tx_queue *)queue;
568         internals = bd_tx_q->dev_private;
569
570         /* Copy slave list to protect against slave up/down changes during tx
571          * bursting */
572         num_of_slaves = internals->active_slave_count;
573         memcpy(slaves, internals->active_slaves,
574                         sizeof(internals->active_slaves[0]) * num_of_slaves);
575
576         if (num_of_slaves < 1)
577                 return num_tx_total;
578
579         /* Populate slaves mbuf with which packets are to be sent on it  */
580         for (i = 0; i < nb_pkts; i++) {
581                 cslave_idx = (slave_idx + i) % num_of_slaves;
582                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
583         }
584
585         /* increment current slave index so the next call to tx burst starts on the
586          * next slave */
587         slave_idx = ++cslave_idx;
588
589         /* Send packet burst on each slave device */
590         for (i = 0; i < num_of_slaves; i++) {
591                 if (slave_nb_pkts[i] > 0) {
592                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
593                                         slave_bufs[i], slave_nb_pkts[i]);
594
595                         /* if tx burst fails move packets to end of bufs */
596                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
597                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
598
599                                 tx_fail_total += tx_fail_slave;
600
601                                 memcpy(&bufs[nb_pkts - tx_fail_total],
602                                        &slave_bufs[i][num_tx_slave],
603                                        tx_fail_slave * sizeof(bufs[0]));
604                         }
605                         num_tx_total += num_tx_slave;
606                 }
607         }
608
609         return num_tx_total;
610 }
611
612 static uint16_t
613 bond_ethdev_tx_burst_active_backup(void *queue,
614                 struct rte_mbuf **bufs, uint16_t nb_pkts)
615 {
616         struct bond_dev_private *internals;
617         struct bond_tx_queue *bd_tx_q;
618
619         bd_tx_q = (struct bond_tx_queue *)queue;
620         internals = bd_tx_q->dev_private;
621
622         if (internals->active_slave_count < 1)
623                 return 0;
624
625         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
626                         bufs, nb_pkts);
627 }
628
629 static inline uint16_t
630 ether_hash(struct rte_ether_hdr *eth_hdr)
631 {
632         unaligned_uint16_t *word_src_addr =
633                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
634         unaligned_uint16_t *word_dst_addr =
635                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
636
637         return (word_src_addr[0] ^ word_dst_addr[0]) ^
638                         (word_src_addr[1] ^ word_dst_addr[1]) ^
639                         (word_src_addr[2] ^ word_dst_addr[2]);
640 }
641
642 static inline uint32_t
643 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
644 {
645         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
646 }
647
648 static inline uint32_t
649 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
650 {
651         unaligned_uint32_t *word_src_addr =
652                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
653         unaligned_uint32_t *word_dst_addr =
654                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
655
656         return (word_src_addr[0] ^ word_dst_addr[0]) ^
657                         (word_src_addr[1] ^ word_dst_addr[1]) ^
658                         (word_src_addr[2] ^ word_dst_addr[2]) ^
659                         (word_src_addr[3] ^ word_dst_addr[3]);
660 }
661
662
663 void
664 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
665                 uint16_t slave_count, uint16_t *slaves)
666 {
667         struct rte_ether_hdr *eth_hdr;
668         uint32_t hash;
669         int i;
670
671         for (i = 0; i < nb_pkts; i++) {
672                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
673
674                 hash = ether_hash(eth_hdr);
675
676                 slaves[i] = (hash ^= hash >> 8) % slave_count;
677         }
678 }
679
680 void
681 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
682                 uint16_t slave_count, uint16_t *slaves)
683 {
684         uint16_t i;
685         struct rte_ether_hdr *eth_hdr;
686         uint16_t proto;
687         size_t vlan_offset;
688         uint32_t hash, l3hash;
689
690         for (i = 0; i < nb_pkts; i++) {
691                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
692                 l3hash = 0;
693
694                 proto = eth_hdr->ether_type;
695                 hash = ether_hash(eth_hdr);
696
697                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
698
699                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
700                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
701                                         ((char *)(eth_hdr + 1) + vlan_offset);
702                         l3hash = ipv4_hash(ipv4_hdr);
703
704                 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
705                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
706                                         ((char *)(eth_hdr + 1) + vlan_offset);
707                         l3hash = ipv6_hash(ipv6_hdr);
708                 }
709
710                 hash = hash ^ l3hash;
711                 hash ^= hash >> 16;
712                 hash ^= hash >> 8;
713
714                 slaves[i] = hash % slave_count;
715         }
716 }
717
718 void
719 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
720                 uint16_t slave_count, uint16_t *slaves)
721 {
722         struct rte_ether_hdr *eth_hdr;
723         uint16_t proto;
724         size_t vlan_offset;
725         int i;
726
727         struct rte_udp_hdr *udp_hdr;
728         struct rte_tcp_hdr *tcp_hdr;
729         uint32_t hash, l3hash, l4hash;
730
731         for (i = 0; i < nb_pkts; i++) {
732                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
733                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
734                 proto = eth_hdr->ether_type;
735                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
736                 l3hash = 0;
737                 l4hash = 0;
738
739                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
740                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
741                                         ((char *)(eth_hdr + 1) + vlan_offset);
742                         size_t ip_hdr_offset;
743
744                         l3hash = ipv4_hash(ipv4_hdr);
745
746                         /* there is no L4 header in fragmented packet */
747                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
748                                                                 == 0)) {
749                                 ip_hdr_offset = (ipv4_hdr->version_ihl
750                                         & RTE_IPV4_HDR_IHL_MASK) *
751                                         RTE_IPV4_IHL_MULTIPLIER;
752
753                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
754                                         tcp_hdr = (struct rte_tcp_hdr *)
755                                                 ((char *)ipv4_hdr +
756                                                         ip_hdr_offset);
757                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
758                                                         < pkt_end)
759                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
760                                 } else if (ipv4_hdr->next_proto_id ==
761                                                                 IPPROTO_UDP) {
762                                         udp_hdr = (struct rte_udp_hdr *)
763                                                 ((char *)ipv4_hdr +
764                                                         ip_hdr_offset);
765                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
766                                                         < pkt_end)
767                                                 l4hash = HASH_L4_PORTS(udp_hdr);
768                                 }
769                         }
770                 } else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
771                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
772                                         ((char *)(eth_hdr + 1) + vlan_offset);
773                         l3hash = ipv6_hash(ipv6_hdr);
774
775                         if (ipv6_hdr->proto == IPPROTO_TCP) {
776                                 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
777                                 l4hash = HASH_L4_PORTS(tcp_hdr);
778                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
779                                 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
780                                 l4hash = HASH_L4_PORTS(udp_hdr);
781                         }
782                 }
783
784                 hash = l3hash ^ l4hash;
785                 hash ^= hash >> 16;
786                 hash ^= hash >> 8;
787
788                 slaves[i] = hash % slave_count;
789         }
790 }
791
792 struct bwg_slave {
793         uint64_t bwg_left_int;
794         uint64_t bwg_left_remainder;
795         uint16_t slave;
796 };
797
798 void
799 bond_tlb_activate_slave(struct bond_dev_private *internals) {
800         int i;
801
802         for (i = 0; i < internals->active_slave_count; i++) {
803                 tlb_last_obytets[internals->active_slaves[i]] = 0;
804         }
805 }
806
807 static int
808 bandwidth_cmp(const void *a, const void *b)
809 {
810         const struct bwg_slave *bwg_a = a;
811         const struct bwg_slave *bwg_b = b;
812         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
813         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
814                         (int64_t)bwg_a->bwg_left_remainder;
815         if (diff > 0)
816                 return 1;
817         else if (diff < 0)
818                 return -1;
819         else if (diff2 > 0)
820                 return 1;
821         else if (diff2 < 0)
822                 return -1;
823         else
824                 return 0;
825 }
826
827 static void
828 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
829                 struct bwg_slave *bwg_slave)
830 {
831         struct rte_eth_link link_status;
832
833         rte_eth_link_get_nowait(port_id, &link_status);
834         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
835         if (link_bwg == 0)
836                 return;
837         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
838         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
839         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
840 }
841
842 static void
843 bond_ethdev_update_tlb_slave_cb(void *arg)
844 {
845         struct bond_dev_private *internals = arg;
846         struct rte_eth_stats slave_stats;
847         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
848         uint16_t slave_count;
849         uint64_t tx_bytes;
850
851         uint8_t update_stats = 0;
852         uint16_t slave_id;
853         uint16_t i;
854
855         internals->slave_update_idx++;
856
857
858         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
859                 update_stats = 1;
860
861         for (i = 0; i < internals->active_slave_count; i++) {
862                 slave_id = internals->active_slaves[i];
863                 rte_eth_stats_get(slave_id, &slave_stats);
864                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
865                 bandwidth_left(slave_id, tx_bytes,
866                                 internals->slave_update_idx, &bwg_array[i]);
867                 bwg_array[i].slave = slave_id;
868
869                 if (update_stats) {
870                         tlb_last_obytets[slave_id] = slave_stats.obytes;
871                 }
872         }
873
874         if (update_stats == 1)
875                 internals->slave_update_idx = 0;
876
877         slave_count = i;
878         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
879         for (i = 0; i < slave_count; i++)
880                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
881
882         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
883                         (struct bond_dev_private *)internals);
884 }
885
886 static uint16_t
887 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
888 {
889         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
890         struct bond_dev_private *internals = bd_tx_q->dev_private;
891
892         struct rte_eth_dev *primary_port =
893                         &rte_eth_devices[internals->primary_port];
894         uint16_t num_tx_total = 0;
895         uint16_t i, j;
896
897         uint16_t num_of_slaves = internals->active_slave_count;
898         uint16_t slaves[RTE_MAX_ETHPORTS];
899
900         struct rte_ether_hdr *ether_hdr;
901         struct rte_ether_addr primary_slave_addr;
902         struct rte_ether_addr active_slave_addr;
903
904         if (num_of_slaves < 1)
905                 return num_tx_total;
906
907         memcpy(slaves, internals->tlb_slaves_order,
908                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
909
910
911         rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
912
913         if (nb_pkts > 3) {
914                 for (i = 0; i < 3; i++)
915                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
916         }
917
918         for (i = 0; i < num_of_slaves; i++) {
919                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
920                 for (j = num_tx_total; j < nb_pkts; j++) {
921                         if (j + 3 < nb_pkts)
922                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
923
924                         ether_hdr = rte_pktmbuf_mtod(bufs[j],
925                                                 struct rte_ether_hdr *);
926                         if (rte_is_same_ether_addr(&ether_hdr->s_addr,
927                                                         &primary_slave_addr))
928                                 rte_ether_addr_copy(&active_slave_addr,
929                                                 &ether_hdr->s_addr);
930 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
931                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
932 #endif
933                 }
934
935                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
936                                 bufs + num_tx_total, nb_pkts - num_tx_total);
937
938                 if (num_tx_total == nb_pkts)
939                         break;
940         }
941
942         return num_tx_total;
943 }
944
945 void
946 bond_tlb_disable(struct bond_dev_private *internals)
947 {
948         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
949 }
950
951 void
952 bond_tlb_enable(struct bond_dev_private *internals)
953 {
954         bond_ethdev_update_tlb_slave_cb(internals);
955 }
956
957 static uint16_t
958 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
959 {
960         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
961         struct bond_dev_private *internals = bd_tx_q->dev_private;
962
963         struct rte_ether_hdr *eth_h;
964         uint16_t ether_type, offset;
965
966         struct client_data *client_info;
967
968         /*
969          * We create transmit buffers for every slave and one additional to send
970          * through tlb. In worst case every packet will be send on one port.
971          */
972         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
973         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
974
975         /*
976          * We create separate transmit buffers for update packets as they won't
977          * be counted in num_tx_total.
978          */
979         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
980         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
981
982         struct rte_mbuf *upd_pkt;
983         size_t pkt_size;
984
985         uint16_t num_send, num_not_send = 0;
986         uint16_t num_tx_total = 0;
987         uint16_t slave_idx;
988
989         int i, j;
990
991         /* Search tx buffer for ARP packets and forward them to alb */
992         for (i = 0; i < nb_pkts; i++) {
993                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
994                 ether_type = eth_h->ether_type;
995                 offset = get_vlan_offset(eth_h, &ether_type);
996
997                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
998                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
999
1000                         /* Change src mac in eth header */
1001                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1002
1003                         /* Add packet to slave tx buffer */
1004                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1005                         slave_bufs_pkts[slave_idx]++;
1006                 } else {
1007                         /* If packet is not ARP, send it with TLB policy */
1008                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1009                                         bufs[i];
1010                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1011                 }
1012         }
1013
1014         /* Update connected client ARP tables */
1015         if (internals->mode6.ntt) {
1016                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1017                         client_info = &internals->mode6.client_table[i];
1018
1019                         if (client_info->in_use) {
1020                                 /* Allocate new packet to send ARP update on current slave */
1021                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1022                                 if (upd_pkt == NULL) {
1023                                         RTE_BOND_LOG(ERR,
1024                                                      "Failed to allocate ARP packet from pool");
1025                                         continue;
1026                                 }
1027                                 pkt_size = sizeof(struct rte_ether_hdr) +
1028                                         sizeof(struct rte_arp_hdr) +
1029                                         client_info->vlan_count *
1030                                         sizeof(struct rte_vlan_hdr);
1031                                 upd_pkt->data_len = pkt_size;
1032                                 upd_pkt->pkt_len = pkt_size;
1033
1034                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1035                                                 internals);
1036
1037                                 /* Add packet to update tx buffer */
1038                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1039                                 update_bufs_pkts[slave_idx]++;
1040                         }
1041                 }
1042                 internals->mode6.ntt = 0;
1043         }
1044
1045         /* Send ARP packets on proper slaves */
1046         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1047                 if (slave_bufs_pkts[i] > 0) {
1048                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1049                                         slave_bufs[i], slave_bufs_pkts[i]);
1050                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1051                                 bufs[nb_pkts - 1 - num_not_send - j] =
1052                                                 slave_bufs[i][nb_pkts - 1 - j];
1053                         }
1054
1055                         num_tx_total += num_send;
1056                         num_not_send += slave_bufs_pkts[i] - num_send;
1057
1058 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1059         /* Print TX stats including update packets */
1060                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1061                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1062                                                         struct rte_ether_hdr *);
1063                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1064                         }
1065 #endif
1066                 }
1067         }
1068
1069         /* Send update packets on proper slaves */
1070         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1071                 if (update_bufs_pkts[i] > 0) {
1072                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1073                                         update_bufs_pkts[i]);
1074                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1075                                 rte_pktmbuf_free(update_bufs[i][j]);
1076                         }
1077 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1078                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1079                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1080                                                         struct rte_ether_hdr *);
1081                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1082                         }
1083 #endif
1084                 }
1085         }
1086
1087         /* Send non-ARP packets using tlb policy */
1088         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1089                 num_send = bond_ethdev_tx_burst_tlb(queue,
1090                                 slave_bufs[RTE_MAX_ETHPORTS],
1091                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1092
1093                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1094                         bufs[nb_pkts - 1 - num_not_send - j] =
1095                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1096                 }
1097
1098                 num_tx_total += num_send;
1099         }
1100
1101         return num_tx_total;
1102 }
1103
1104 static inline uint16_t
1105 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1106                  uint16_t *slave_port_ids, uint16_t slave_count)
1107 {
1108         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1109         struct bond_dev_private *internals = bd_tx_q->dev_private;
1110
1111         /* Array to sort mbufs for transmission on each slave into */
1112         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1113         /* Number of mbufs for transmission on each slave */
1114         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1115         /* Mapping array generated by hash function to map mbufs to slaves */
1116         uint16_t bufs_slave_port_idxs[nb_bufs];
1117
1118         uint16_t slave_tx_count;
1119         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1120
1121         uint16_t i;
1122
1123         /*
1124          * Populate slaves mbuf with the packets which are to be sent on it
1125          * selecting output slave using hash based on xmit policy
1126          */
1127         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1128                         bufs_slave_port_idxs);
1129
1130         for (i = 0; i < nb_bufs; i++) {
1131                 /* Populate slave mbuf arrays with mbufs for that slave. */
1132                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1133
1134                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1135         }
1136
1137         /* Send packet burst on each slave device */
1138         for (i = 0; i < slave_count; i++) {
1139                 if (slave_nb_bufs[i] == 0)
1140                         continue;
1141
1142                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1143                                 bd_tx_q->queue_id, slave_bufs[i],
1144                                 slave_nb_bufs[i]);
1145
1146                 total_tx_count += slave_tx_count;
1147
1148                 /* If tx burst fails move packets to end of bufs */
1149                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1150                         int slave_tx_fail_count = slave_nb_bufs[i] -
1151                                         slave_tx_count;
1152                         total_tx_fail_count += slave_tx_fail_count;
1153                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1154                                &slave_bufs[i][slave_tx_count],
1155                                slave_tx_fail_count * sizeof(bufs[0]));
1156                 }
1157         }
1158
1159         return total_tx_count;
1160 }
1161
1162 static uint16_t
1163 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1164                 uint16_t nb_bufs)
1165 {
1166         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1167         struct bond_dev_private *internals = bd_tx_q->dev_private;
1168
1169         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1170         uint16_t slave_count;
1171
1172         if (unlikely(nb_bufs == 0))
1173                 return 0;
1174
1175         /* Copy slave list to protect against slave up/down changes during tx
1176          * bursting
1177          */
1178         slave_count = internals->active_slave_count;
1179         if (unlikely(slave_count < 1))
1180                 return 0;
1181
1182         memcpy(slave_port_ids, internals->active_slaves,
1183                         sizeof(slave_port_ids[0]) * slave_count);
1184         return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1185                                 slave_count);
1186 }
1187
1188 static inline uint16_t
1189 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1190                 bool dedicated_txq)
1191 {
1192         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1193         struct bond_dev_private *internals = bd_tx_q->dev_private;
1194
1195         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1196         uint16_t slave_count;
1197
1198         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1199         uint16_t dist_slave_count;
1200
1201         uint16_t slave_tx_count;
1202
1203         uint16_t i;
1204
1205         /* Copy slave list to protect against slave up/down changes during tx
1206          * bursting */
1207         slave_count = internals->active_slave_count;
1208         if (unlikely(slave_count < 1))
1209                 return 0;
1210
1211         memcpy(slave_port_ids, internals->active_slaves,
1212                         sizeof(slave_port_ids[0]) * slave_count);
1213
1214         if (dedicated_txq)
1215                 goto skip_tx_ring;
1216
1217         /* Check for LACP control packets and send if available */
1218         for (i = 0; i < slave_count; i++) {
1219                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1220                 struct rte_mbuf *ctrl_pkt = NULL;
1221
1222                 if (likely(rte_ring_empty(port->tx_ring)))
1223                         continue;
1224
1225                 if (rte_ring_dequeue(port->tx_ring,
1226                                      (void **)&ctrl_pkt) != -ENOENT) {
1227                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1228                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1229                         /*
1230                          * re-enqueue LAG control plane packets to buffering
1231                          * ring if transmission fails so the packet isn't lost.
1232                          */
1233                         if (slave_tx_count != 1)
1234                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1235                 }
1236         }
1237
1238 skip_tx_ring:
1239         if (unlikely(nb_bufs == 0))
1240                 return 0;
1241
1242         dist_slave_count = 0;
1243         for (i = 0; i < slave_count; i++) {
1244                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1245
1246                 if (ACTOR_STATE(port, DISTRIBUTING))
1247                         dist_slave_port_ids[dist_slave_count++] =
1248                                         slave_port_ids[i];
1249         }
1250
1251         if (unlikely(dist_slave_count < 1))
1252                 return 0;
1253
1254         return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1255                                 dist_slave_count);
1256 }
1257
1258 static uint16_t
1259 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1260                 uint16_t nb_bufs)
1261 {
1262         return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1263 }
1264
1265 static uint16_t
1266 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1267                 uint16_t nb_bufs)
1268 {
1269         return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1270 }
1271
1272 static uint16_t
1273 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1274                 uint16_t nb_pkts)
1275 {
1276         struct bond_dev_private *internals;
1277         struct bond_tx_queue *bd_tx_q;
1278
1279         uint16_t slaves[RTE_MAX_ETHPORTS];
1280         uint8_t tx_failed_flag = 0;
1281         uint16_t num_of_slaves;
1282
1283         uint16_t max_nb_of_tx_pkts = 0;
1284
1285         int slave_tx_total[RTE_MAX_ETHPORTS];
1286         int i, most_successful_tx_slave = -1;
1287
1288         bd_tx_q = (struct bond_tx_queue *)queue;
1289         internals = bd_tx_q->dev_private;
1290
1291         /* Copy slave list to protect against slave up/down changes during tx
1292          * bursting */
1293         num_of_slaves = internals->active_slave_count;
1294         memcpy(slaves, internals->active_slaves,
1295                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1296
1297         if (num_of_slaves < 1)
1298                 return 0;
1299
1300         /* Increment reference count on mbufs */
1301         for (i = 0; i < nb_pkts; i++)
1302                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1303
1304         /* Transmit burst on each active slave */
1305         for (i = 0; i < num_of_slaves; i++) {
1306                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1307                                         bufs, nb_pkts);
1308
1309                 if (unlikely(slave_tx_total[i] < nb_pkts))
1310                         tx_failed_flag = 1;
1311
1312                 /* record the value and slave index for the slave which transmits the
1313                  * maximum number of packets */
1314                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1315                         max_nb_of_tx_pkts = slave_tx_total[i];
1316                         most_successful_tx_slave = i;
1317                 }
1318         }
1319
1320         /* if slaves fail to transmit packets from burst, the calling application
1321          * is not expected to know about multiple references to packets so we must
1322          * handle failures of all packets except those of the most successful slave
1323          */
1324         if (unlikely(tx_failed_flag))
1325                 for (i = 0; i < num_of_slaves; i++)
1326                         if (i != most_successful_tx_slave)
1327                                 while (slave_tx_total[i] < nb_pkts)
1328                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1329
1330         return max_nb_of_tx_pkts;
1331 }
1332
1333 static void
1334 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1335 {
1336         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1337
1338         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1339                 /**
1340                  * If in mode 4 then save the link properties of the first
1341                  * slave, all subsequent slaves must match these properties
1342                  */
1343                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1344
1345                 bond_link->link_autoneg = slave_link->link_autoneg;
1346                 bond_link->link_duplex = slave_link->link_duplex;
1347                 bond_link->link_speed = slave_link->link_speed;
1348         } else {
1349                 /**
1350                  * In any other mode the link properties are set to default
1351                  * values of AUTONEG/DUPLEX
1352                  */
1353                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1354                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1355         }
1356 }
1357
1358 static int
1359 link_properties_valid(struct rte_eth_dev *ethdev,
1360                 struct rte_eth_link *slave_link)
1361 {
1362         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1363
1364         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1365                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1366
1367                 if (bond_link->link_duplex != slave_link->link_duplex ||
1368                         bond_link->link_autoneg != slave_link->link_autoneg ||
1369                         bond_link->link_speed != slave_link->link_speed)
1370                         return -1;
1371         }
1372
1373         return 0;
1374 }
1375
1376 int
1377 mac_address_get(struct rte_eth_dev *eth_dev,
1378                 struct rte_ether_addr *dst_mac_addr)
1379 {
1380         struct rte_ether_addr *mac_addr;
1381
1382         if (eth_dev == NULL) {
1383                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1384                 return -1;
1385         }
1386
1387         if (dst_mac_addr == NULL) {
1388                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1389                 return -1;
1390         }
1391
1392         mac_addr = eth_dev->data->mac_addrs;
1393
1394         rte_ether_addr_copy(mac_addr, dst_mac_addr);
1395         return 0;
1396 }
1397
1398 int
1399 mac_address_set(struct rte_eth_dev *eth_dev,
1400                 struct rte_ether_addr *new_mac_addr)
1401 {
1402         struct rte_ether_addr *mac_addr;
1403
1404         if (eth_dev == NULL) {
1405                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1406                 return -1;
1407         }
1408
1409         if (new_mac_addr == NULL) {
1410                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1411                 return -1;
1412         }
1413
1414         mac_addr = eth_dev->data->mac_addrs;
1415
1416         /* If new MAC is different to current MAC then update */
1417         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1418                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1419
1420         return 0;
1421 }
1422
1423 static const struct rte_ether_addr null_mac_addr;
1424
1425 /*
1426  * Add additional MAC addresses to the slave
1427  */
1428 int
1429 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1430                 uint16_t slave_port_id)
1431 {
1432         int i, ret;
1433         struct rte_ether_addr *mac_addr;
1434
1435         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1436                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1437                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1438                         break;
1439
1440                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1441                 if (ret < 0) {
1442                         /* rollback */
1443                         for (i--; i > 0; i--)
1444                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1445                                         &bonded_eth_dev->data->mac_addrs[i]);
1446                         return ret;
1447                 }
1448         }
1449
1450         return 0;
1451 }
1452
1453 /*
1454  * Remove additional MAC addresses from the slave
1455  */
1456 int
1457 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1458                 uint16_t slave_port_id)
1459 {
1460         int i, rc, ret;
1461         struct rte_ether_addr *mac_addr;
1462
1463         rc = 0;
1464         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1465                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1466                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1467                         break;
1468
1469                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1470                 /* save only the first error */
1471                 if (ret < 0 && rc == 0)
1472                         rc = ret;
1473         }
1474
1475         return rc;
1476 }
1477
1478 int
1479 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1480 {
1481         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1482         int i;
1483
1484         /* Update slave devices MAC addresses */
1485         if (internals->slave_count < 1)
1486                 return -1;
1487
1488         switch (internals->mode) {
1489         case BONDING_MODE_ROUND_ROBIN:
1490         case BONDING_MODE_BALANCE:
1491         case BONDING_MODE_BROADCAST:
1492                 for (i = 0; i < internals->slave_count; i++) {
1493                         if (rte_eth_dev_default_mac_addr_set(
1494                                         internals->slaves[i].port_id,
1495                                         bonded_eth_dev->data->mac_addrs)) {
1496                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1497                                                 internals->slaves[i].port_id);
1498                                 return -1;
1499                         }
1500                 }
1501                 break;
1502         case BONDING_MODE_8023AD:
1503                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1504                 break;
1505         case BONDING_MODE_ACTIVE_BACKUP:
1506         case BONDING_MODE_TLB:
1507         case BONDING_MODE_ALB:
1508         default:
1509                 for (i = 0; i < internals->slave_count; i++) {
1510                         if (internals->slaves[i].port_id ==
1511                                         internals->current_primary_port) {
1512                                 if (rte_eth_dev_default_mac_addr_set(
1513                                                 internals->primary_port,
1514                                                 bonded_eth_dev->data->mac_addrs)) {
1515                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1516                                                         internals->current_primary_port);
1517                                         return -1;
1518                                 }
1519                         } else {
1520                                 if (rte_eth_dev_default_mac_addr_set(
1521                                                 internals->slaves[i].port_id,
1522                                                 &internals->slaves[i].persisted_mac_addr)) {
1523                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1524                                                         internals->slaves[i].port_id);
1525                                         return -1;
1526                                 }
1527                         }
1528                 }
1529         }
1530
1531         return 0;
1532 }
1533
1534 int
1535 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1536 {
1537         struct bond_dev_private *internals;
1538
1539         internals = eth_dev->data->dev_private;
1540
1541         switch (mode) {
1542         case BONDING_MODE_ROUND_ROBIN:
1543                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1544                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1545                 break;
1546         case BONDING_MODE_ACTIVE_BACKUP:
1547                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1548                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1549                 break;
1550         case BONDING_MODE_BALANCE:
1551                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1552                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1553                 break;
1554         case BONDING_MODE_BROADCAST:
1555                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1556                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1557                 break;
1558         case BONDING_MODE_8023AD:
1559                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1560                         return -1;
1561
1562                 if (internals->mode4.dedicated_queues.enabled == 0) {
1563                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1564                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1565                         RTE_BOND_LOG(WARNING,
1566                                 "Using mode 4, it is necessary to do TX burst "
1567                                 "and RX burst at least every 100ms.");
1568                 } else {
1569                         /* Use flow director's optimization */
1570                         eth_dev->rx_pkt_burst =
1571                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1572                         eth_dev->tx_pkt_burst =
1573                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1574                 }
1575                 break;
1576         case BONDING_MODE_TLB:
1577                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1578                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1579                 break;
1580         case BONDING_MODE_ALB:
1581                 if (bond_mode_alb_enable(eth_dev) != 0)
1582                         return -1;
1583
1584                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1585                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1586                 break;
1587         default:
1588                 return -1;
1589         }
1590
1591         internals->mode = mode;
1592
1593         return 0;
1594 }
1595
1596
1597 static int
1598 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1599                 struct rte_eth_dev *slave_eth_dev)
1600 {
1601         int errval = 0;
1602         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1603         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1604
1605         if (port->slow_pool == NULL) {
1606                 char mem_name[256];
1607                 int slave_id = slave_eth_dev->data->port_id;
1608
1609                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1610                                 slave_id);
1611                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1612                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1613                         slave_eth_dev->data->numa_node);
1614
1615                 /* Any memory allocation failure in initialization is critical because
1616                  * resources can't be free, so reinitialization is impossible. */
1617                 if (port->slow_pool == NULL) {
1618                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1619                                 slave_id, mem_name, rte_strerror(rte_errno));
1620                 }
1621         }
1622
1623         if (internals->mode4.dedicated_queues.enabled == 1) {
1624                 /* Configure slow Rx queue */
1625
1626                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1627                                 internals->mode4.dedicated_queues.rx_qid, 128,
1628                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1629                                 NULL, port->slow_pool);
1630                 if (errval != 0) {
1631                         RTE_BOND_LOG(ERR,
1632                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1633                                         slave_eth_dev->data->port_id,
1634                                         internals->mode4.dedicated_queues.rx_qid,
1635                                         errval);
1636                         return errval;
1637                 }
1638
1639                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1640                                 internals->mode4.dedicated_queues.tx_qid, 512,
1641                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1642                                 NULL);
1643                 if (errval != 0) {
1644                         RTE_BOND_LOG(ERR,
1645                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1646                                 slave_eth_dev->data->port_id,
1647                                 internals->mode4.dedicated_queues.tx_qid,
1648                                 errval);
1649                         return errval;
1650                 }
1651         }
1652         return 0;
1653 }
1654
1655 int
1656 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1657                 struct rte_eth_dev *slave_eth_dev)
1658 {
1659         struct bond_rx_queue *bd_rx_q;
1660         struct bond_tx_queue *bd_tx_q;
1661         uint16_t nb_rx_queues;
1662         uint16_t nb_tx_queues;
1663
1664         int errval;
1665         uint16_t q_id;
1666         struct rte_flow_error flow_error;
1667
1668         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1669
1670         /* Stop slave */
1671         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1672
1673         /* Enable interrupts on slave device if supported */
1674         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1675                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1676
1677         /* If RSS is enabled for bonding, try to enable it for slaves  */
1678         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1679                 if (internals->rss_key_len != 0) {
1680                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1681                                         internals->rss_key_len;
1682                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1683                                         internals->rss_key;
1684                 } else {
1685                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1686                 }
1687
1688                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1689                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1690                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1691                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1692         }
1693
1694         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1695                         DEV_RX_OFFLOAD_VLAN_FILTER)
1696                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1697                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1698         else
1699                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1700                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1701
1702         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1703         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1704
1705         if (internals->mode == BONDING_MODE_8023AD) {
1706                 if (internals->mode4.dedicated_queues.enabled == 1) {
1707                         nb_rx_queues++;
1708                         nb_tx_queues++;
1709                 }
1710         }
1711
1712         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1713                                      bonded_eth_dev->data->mtu);
1714         if (errval != 0 && errval != -ENOTSUP) {
1715                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1716                                 slave_eth_dev->data->port_id, errval);
1717                 return errval;
1718         }
1719
1720         /* Configure device */
1721         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1722                         nb_rx_queues, nb_tx_queues,
1723                         &(slave_eth_dev->data->dev_conf));
1724         if (errval != 0) {
1725                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1726                                 slave_eth_dev->data->port_id, errval);
1727                 return errval;
1728         }
1729
1730         /* Setup Rx Queues */
1731         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1732                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1733
1734                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1735                                 bd_rx_q->nb_rx_desc,
1736                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1737                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1738                 if (errval != 0) {
1739                         RTE_BOND_LOG(ERR,
1740                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1741                                         slave_eth_dev->data->port_id, q_id, errval);
1742                         return errval;
1743                 }
1744         }
1745
1746         /* Setup Tx Queues */
1747         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1748                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1749
1750                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1751                                 bd_tx_q->nb_tx_desc,
1752                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1753                                 &bd_tx_q->tx_conf);
1754                 if (errval != 0) {
1755                         RTE_BOND_LOG(ERR,
1756                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1757                                 slave_eth_dev->data->port_id, q_id, errval);
1758                         return errval;
1759                 }
1760         }
1761
1762         if (internals->mode == BONDING_MODE_8023AD &&
1763                         internals->mode4.dedicated_queues.enabled == 1) {
1764                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1765                                 != 0)
1766                         return errval;
1767
1768                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1769                                 slave_eth_dev->data->port_id) != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id, q_id, errval);
1773                         return -1;
1774                 }
1775
1776                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1777                         rte_flow_destroy(slave_eth_dev->data->port_id,
1778                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1779                                         &flow_error);
1780
1781                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1782                                 slave_eth_dev->data->port_id);
1783         }
1784
1785         /* Start device */
1786         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1787         if (errval != 0) {
1788                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1789                                 slave_eth_dev->data->port_id, errval);
1790                 return -1;
1791         }
1792
1793         /* If RSS is enabled for bonding, synchronize RETA */
1794         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1795                 int i;
1796                 struct bond_dev_private *internals;
1797
1798                 internals = bonded_eth_dev->data->dev_private;
1799
1800                 for (i = 0; i < internals->slave_count; i++) {
1801                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1802                                 errval = rte_eth_dev_rss_reta_update(
1803                                                 slave_eth_dev->data->port_id,
1804                                                 &internals->reta_conf[0],
1805                                                 internals->slaves[i].reta_size);
1806                                 if (errval != 0) {
1807                                         RTE_BOND_LOG(WARNING,
1808                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1809                                                      " RSS Configuration for bonding may be inconsistent.",
1810                                                      slave_eth_dev->data->port_id, errval);
1811                                 }
1812                                 break;
1813                         }
1814                 }
1815         }
1816
1817         /* If lsc interrupt is set, check initial slave's link status */
1818         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1819                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1820                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1821                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1822                         NULL);
1823         }
1824
1825         return 0;
1826 }
1827
1828 void
1829 slave_remove(struct bond_dev_private *internals,
1830                 struct rte_eth_dev *slave_eth_dev)
1831 {
1832         uint16_t i;
1833
1834         for (i = 0; i < internals->slave_count; i++)
1835                 if (internals->slaves[i].port_id ==
1836                                 slave_eth_dev->data->port_id)
1837                         break;
1838
1839         if (i < (internals->slave_count - 1)) {
1840                 struct rte_flow *flow;
1841
1842                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1843                                 sizeof(internals->slaves[0]) *
1844                                 (internals->slave_count - i - 1));
1845                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1846                         memmove(&flow->flows[i], &flow->flows[i + 1],
1847                                 sizeof(flow->flows[0]) *
1848                                 (internals->slave_count - i - 1));
1849                         flow->flows[internals->slave_count - 1] = NULL;
1850                 }
1851         }
1852
1853         internals->slave_count--;
1854
1855         /* force reconfiguration of slave interfaces */
1856         _rte_eth_dev_reset(slave_eth_dev);
1857 }
1858
1859 static void
1860 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1861
1862 void
1863 slave_add(struct bond_dev_private *internals,
1864                 struct rte_eth_dev *slave_eth_dev)
1865 {
1866         struct bond_slave_details *slave_details =
1867                         &internals->slaves[internals->slave_count];
1868
1869         slave_details->port_id = slave_eth_dev->data->port_id;
1870         slave_details->last_link_status = 0;
1871
1872         /* Mark slave devices that don't support interrupts so we can
1873          * compensate when we start the bond
1874          */
1875         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1876                 slave_details->link_status_poll_enabled = 1;
1877         }
1878
1879         slave_details->link_status_wait_to_complete = 0;
1880         /* clean tlb_last_obytes when adding port for bonding device */
1881         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1882                         sizeof(struct rte_ether_addr));
1883 }
1884
1885 void
1886 bond_ethdev_primary_set(struct bond_dev_private *internals,
1887                 uint16_t slave_port_id)
1888 {
1889         int i;
1890
1891         if (internals->active_slave_count < 1)
1892                 internals->current_primary_port = slave_port_id;
1893         else
1894                 /* Search bonded device slave ports for new proposed primary port */
1895                 for (i = 0; i < internals->active_slave_count; i++) {
1896                         if (internals->active_slaves[i] == slave_port_id)
1897                                 internals->current_primary_port = slave_port_id;
1898                 }
1899 }
1900
1901 static void
1902 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1903
1904 static int
1905 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1906 {
1907         struct bond_dev_private *internals;
1908         int i;
1909
1910         /* slave eth dev will be started by bonded device */
1911         if (check_for_bonded_ethdev(eth_dev)) {
1912                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1913                                 eth_dev->data->port_id);
1914                 return -1;
1915         }
1916
1917         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1918         eth_dev->data->dev_started = 1;
1919
1920         internals = eth_dev->data->dev_private;
1921
1922         if (internals->slave_count == 0) {
1923                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1924                 goto out_err;
1925         }
1926
1927         if (internals->user_defined_mac == 0) {
1928                 struct rte_ether_addr *new_mac_addr = NULL;
1929
1930                 for (i = 0; i < internals->slave_count; i++)
1931                         if (internals->slaves[i].port_id == internals->primary_port)
1932                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1933
1934                 if (new_mac_addr == NULL)
1935                         goto out_err;
1936
1937                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1938                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1939                                         eth_dev->data->port_id);
1940                         goto out_err;
1941                 }
1942         }
1943
1944         if (internals->mode == BONDING_MODE_8023AD) {
1945                 if (internals->mode4.dedicated_queues.enabled == 1) {
1946                         internals->mode4.dedicated_queues.rx_qid =
1947                                         eth_dev->data->nb_rx_queues;
1948                         internals->mode4.dedicated_queues.tx_qid =
1949                                         eth_dev->data->nb_tx_queues;
1950                 }
1951         }
1952
1953
1954         /* Reconfigure each slave device if starting bonded device */
1955         for (i = 0; i < internals->slave_count; i++) {
1956                 struct rte_eth_dev *slave_ethdev =
1957                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1958                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1959                         RTE_BOND_LOG(ERR,
1960                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1961                                 eth_dev->data->port_id,
1962                                 internals->slaves[i].port_id);
1963                         goto out_err;
1964                 }
1965                 /* We will need to poll for link status if any slave doesn't
1966                  * support interrupts
1967                  */
1968                 if (internals->slaves[i].link_status_poll_enabled)
1969                         internals->link_status_polling_enabled = 1;
1970         }
1971
1972         /* start polling if needed */
1973         if (internals->link_status_polling_enabled) {
1974                 rte_eal_alarm_set(
1975                         internals->link_status_polling_interval_ms * 1000,
1976                         bond_ethdev_slave_link_status_change_monitor,
1977                         (void *)&rte_eth_devices[internals->port_id]);
1978         }
1979
1980         /* Update all slave devices MACs*/
1981         if (mac_address_slaves_update(eth_dev) != 0)
1982                 goto out_err;
1983
1984         if (internals->user_defined_primary_port)
1985                 bond_ethdev_primary_set(internals, internals->primary_port);
1986
1987         if (internals->mode == BONDING_MODE_8023AD)
1988                 bond_mode_8023ad_start(eth_dev);
1989
1990         if (internals->mode == BONDING_MODE_TLB ||
1991                         internals->mode == BONDING_MODE_ALB)
1992                 bond_tlb_enable(internals);
1993
1994         return 0;
1995
1996 out_err:
1997         eth_dev->data->dev_started = 0;
1998         return -1;
1999 }
2000
2001 static void
2002 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2003 {
2004         uint16_t i;
2005
2006         if (dev->data->rx_queues != NULL) {
2007                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2008                         rte_free(dev->data->rx_queues[i]);
2009                         dev->data->rx_queues[i] = NULL;
2010                 }
2011                 dev->data->nb_rx_queues = 0;
2012         }
2013
2014         if (dev->data->tx_queues != NULL) {
2015                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2016                         rte_free(dev->data->tx_queues[i]);
2017                         dev->data->tx_queues[i] = NULL;
2018                 }
2019                 dev->data->nb_tx_queues = 0;
2020         }
2021 }
2022
2023 void
2024 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2025 {
2026         struct bond_dev_private *internals = eth_dev->data->dev_private;
2027         uint16_t i;
2028
2029         if (internals->mode == BONDING_MODE_8023AD) {
2030                 struct port *port;
2031                 void *pkt = NULL;
2032
2033                 bond_mode_8023ad_stop(eth_dev);
2034
2035                 /* Discard all messages to/from mode 4 state machines */
2036                 for (i = 0; i < internals->active_slave_count; i++) {
2037                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2038
2039                         RTE_ASSERT(port->rx_ring != NULL);
2040                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2041                                 rte_pktmbuf_free(pkt);
2042
2043                         RTE_ASSERT(port->tx_ring != NULL);
2044                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2045                                 rte_pktmbuf_free(pkt);
2046                 }
2047         }
2048
2049         if (internals->mode == BONDING_MODE_TLB ||
2050                         internals->mode == BONDING_MODE_ALB) {
2051                 bond_tlb_disable(internals);
2052                 for (i = 0; i < internals->active_slave_count; i++)
2053                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2054         }
2055
2056         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2057         eth_dev->data->dev_started = 0;
2058
2059         internals->link_status_polling_enabled = 0;
2060         for (i = 0; i < internals->slave_count; i++) {
2061                 uint16_t slave_id = internals->slaves[i].port_id;
2062                 if (find_slave_by_id(internals->active_slaves,
2063                                 internals->active_slave_count, slave_id) !=
2064                                                 internals->active_slave_count) {
2065                         internals->slaves[i].last_link_status = 0;
2066                         rte_eth_dev_stop(slave_id);
2067                         deactivate_slave(eth_dev, slave_id);
2068                 }
2069         }
2070 }
2071
2072 void
2073 bond_ethdev_close(struct rte_eth_dev *dev)
2074 {
2075         struct bond_dev_private *internals = dev->data->dev_private;
2076         uint16_t bond_port_id = internals->port_id;
2077         int skipped = 0;
2078         struct rte_flow_error ferror;
2079
2080         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2081         while (internals->slave_count != skipped) {
2082                 uint16_t port_id = internals->slaves[skipped].port_id;
2083
2084                 rte_eth_dev_stop(port_id);
2085
2086                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2087                         RTE_BOND_LOG(ERR,
2088                                      "Failed to remove port %d from bonded device %s",
2089                                      port_id, dev->device->name);
2090                         skipped++;
2091                 }
2092         }
2093         bond_flow_ops.flush(dev, &ferror);
2094         bond_ethdev_free_queues(dev);
2095         rte_bitmap_reset(internals->vlan_filter_bmp);
2096 }
2097
2098 /* forward declaration */
2099 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2100
2101 static void
2102 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2103 {
2104         struct bond_dev_private *internals = dev->data->dev_private;
2105
2106         uint16_t max_nb_rx_queues = UINT16_MAX;
2107         uint16_t max_nb_tx_queues = UINT16_MAX;
2108         uint16_t max_rx_desc_lim = UINT16_MAX;
2109         uint16_t max_tx_desc_lim = UINT16_MAX;
2110
2111         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2112
2113         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2114                         internals->candidate_max_rx_pktlen :
2115                         RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2116
2117         /* Max number of tx/rx queues that the bonded device can support is the
2118          * minimum values of the bonded slaves, as all slaves must be capable
2119          * of supporting the same number of tx/rx queues.
2120          */
2121         if (internals->slave_count > 0) {
2122                 struct rte_eth_dev_info slave_info;
2123                 uint16_t idx;
2124
2125                 for (idx = 0; idx < internals->slave_count; idx++) {
2126                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2127                                         &slave_info);
2128
2129                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2130                                 max_nb_rx_queues = slave_info.max_rx_queues;
2131
2132                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2133                                 max_nb_tx_queues = slave_info.max_tx_queues;
2134
2135                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2136                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2137
2138                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2139                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2140                 }
2141         }
2142
2143         dev_info->max_rx_queues = max_nb_rx_queues;
2144         dev_info->max_tx_queues = max_nb_tx_queues;
2145
2146         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2147                sizeof(dev_info->default_rxconf));
2148         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2149                sizeof(dev_info->default_txconf));
2150
2151         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2152         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2153
2154         /**
2155          * If dedicated hw queues enabled for link bonding device in LACP mode
2156          * then we need to reduce the maximum number of data path queues by 1.
2157          */
2158         if (internals->mode == BONDING_MODE_8023AD &&
2159                 internals->mode4.dedicated_queues.enabled == 1) {
2160                 dev_info->max_rx_queues--;
2161                 dev_info->max_tx_queues--;
2162         }
2163
2164         dev_info->min_rx_bufsize = 0;
2165
2166         dev_info->rx_offload_capa = internals->rx_offload_capa;
2167         dev_info->tx_offload_capa = internals->tx_offload_capa;
2168         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2169         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2170         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2171
2172         dev_info->reta_size = internals->reta_size;
2173 }
2174
2175 static int
2176 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2177 {
2178         int res;
2179         uint16_t i;
2180         struct bond_dev_private *internals = dev->data->dev_private;
2181
2182         /* don't do this while a slave is being added */
2183         rte_spinlock_lock(&internals->lock);
2184
2185         if (on)
2186                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2187         else
2188                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2189
2190         for (i = 0; i < internals->slave_count; i++) {
2191                 uint16_t port_id = internals->slaves[i].port_id;
2192
2193                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2194                 if (res == ENOTSUP)
2195                         RTE_BOND_LOG(WARNING,
2196                                      "Setting VLAN filter on slave port %u not supported.",
2197                                      port_id);
2198         }
2199
2200         rte_spinlock_unlock(&internals->lock);
2201         return 0;
2202 }
2203
2204 static int
2205 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2206                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2207                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2208 {
2209         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2210                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2211                                         0, dev->data->numa_node);
2212         if (bd_rx_q == NULL)
2213                 return -1;
2214
2215         bd_rx_q->queue_id = rx_queue_id;
2216         bd_rx_q->dev_private = dev->data->dev_private;
2217
2218         bd_rx_q->nb_rx_desc = nb_rx_desc;
2219
2220         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2221         bd_rx_q->mb_pool = mb_pool;
2222
2223         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2224
2225         return 0;
2226 }
2227
2228 static int
2229 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2230                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2231                 const struct rte_eth_txconf *tx_conf)
2232 {
2233         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2234                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2235                                         0, dev->data->numa_node);
2236
2237         if (bd_tx_q == NULL)
2238                 return -1;
2239
2240         bd_tx_q->queue_id = tx_queue_id;
2241         bd_tx_q->dev_private = dev->data->dev_private;
2242
2243         bd_tx_q->nb_tx_desc = nb_tx_desc;
2244         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2245
2246         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2247
2248         return 0;
2249 }
2250
2251 static void
2252 bond_ethdev_rx_queue_release(void *queue)
2253 {
2254         if (queue == NULL)
2255                 return;
2256
2257         rte_free(queue);
2258 }
2259
2260 static void
2261 bond_ethdev_tx_queue_release(void *queue)
2262 {
2263         if (queue == NULL)
2264                 return;
2265
2266         rte_free(queue);
2267 }
2268
2269 static void
2270 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2271 {
2272         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2273         struct bond_dev_private *internals;
2274
2275         /* Default value for polling slave found is true as we don't want to
2276          * disable the polling thread if we cannot get the lock */
2277         int i, polling_slave_found = 1;
2278
2279         if (cb_arg == NULL)
2280                 return;
2281
2282         bonded_ethdev = cb_arg;
2283         internals = bonded_ethdev->data->dev_private;
2284
2285         if (!bonded_ethdev->data->dev_started ||
2286                 !internals->link_status_polling_enabled)
2287                 return;
2288
2289         /* If device is currently being configured then don't check slaves link
2290          * status, wait until next period */
2291         if (rte_spinlock_trylock(&internals->lock)) {
2292                 if (internals->slave_count > 0)
2293                         polling_slave_found = 0;
2294
2295                 for (i = 0; i < internals->slave_count; i++) {
2296                         if (!internals->slaves[i].link_status_poll_enabled)
2297                                 continue;
2298
2299                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2300                         polling_slave_found = 1;
2301
2302                         /* Update slave link status */
2303                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2304                                         internals->slaves[i].link_status_wait_to_complete);
2305
2306                         /* if link status has changed since last checked then call lsc
2307                          * event callback */
2308                         if (slave_ethdev->data->dev_link.link_status !=
2309                                         internals->slaves[i].last_link_status) {
2310                                 internals->slaves[i].last_link_status =
2311                                                 slave_ethdev->data->dev_link.link_status;
2312
2313                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2314                                                 RTE_ETH_EVENT_INTR_LSC,
2315                                                 &bonded_ethdev->data->port_id,
2316                                                 NULL);
2317                         }
2318                 }
2319                 rte_spinlock_unlock(&internals->lock);
2320         }
2321
2322         if (polling_slave_found)
2323                 /* Set alarm to continue monitoring link status of slave ethdev's */
2324                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2325                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2326 }
2327
2328 static int
2329 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2330 {
2331         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2332
2333         struct bond_dev_private *bond_ctx;
2334         struct rte_eth_link slave_link;
2335
2336         uint32_t idx;
2337
2338         bond_ctx = ethdev->data->dev_private;
2339
2340         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2341
2342         if (ethdev->data->dev_started == 0 ||
2343                         bond_ctx->active_slave_count == 0) {
2344                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2345                 return 0;
2346         }
2347
2348         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2349
2350         if (wait_to_complete)
2351                 link_update = rte_eth_link_get;
2352         else
2353                 link_update = rte_eth_link_get_nowait;
2354
2355         switch (bond_ctx->mode) {
2356         case BONDING_MODE_BROADCAST:
2357                 /**
2358                  * Setting link speed to UINT32_MAX to ensure we pick up the
2359                  * value of the first active slave
2360                  */
2361                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2362
2363                 /**
2364                  * link speed is minimum value of all the slaves link speed as
2365                  * packet loss will occur on this slave if transmission at rates
2366                  * greater than this are attempted
2367                  */
2368                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2369                         link_update(bond_ctx->active_slaves[0], &slave_link);
2370
2371                         if (slave_link.link_speed <
2372                                         ethdev->data->dev_link.link_speed)
2373                                 ethdev->data->dev_link.link_speed =
2374                                                 slave_link.link_speed;
2375                 }
2376                 break;
2377         case BONDING_MODE_ACTIVE_BACKUP:
2378                 /* Current primary slave */
2379                 link_update(bond_ctx->current_primary_port, &slave_link);
2380
2381                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2382                 break;
2383         case BONDING_MODE_8023AD:
2384                 ethdev->data->dev_link.link_autoneg =
2385                                 bond_ctx->mode4.slave_link.link_autoneg;
2386                 ethdev->data->dev_link.link_duplex =
2387                                 bond_ctx->mode4.slave_link.link_duplex;
2388                 /* fall through to update link speed */
2389         case BONDING_MODE_ROUND_ROBIN:
2390         case BONDING_MODE_BALANCE:
2391         case BONDING_MODE_TLB:
2392         case BONDING_MODE_ALB:
2393         default:
2394                 /**
2395                  * In theses mode the maximum theoretical link speed is the sum
2396                  * of all the slaves
2397                  */
2398                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2399
2400                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2401                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2402
2403                         ethdev->data->dev_link.link_speed +=
2404                                         slave_link.link_speed;
2405                 }
2406         }
2407
2408
2409         return 0;
2410 }
2411
2412
2413 static int
2414 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2415 {
2416         struct bond_dev_private *internals = dev->data->dev_private;
2417         struct rte_eth_stats slave_stats;
2418         int i, j;
2419
2420         for (i = 0; i < internals->slave_count; i++) {
2421                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2422
2423                 stats->ipackets += slave_stats.ipackets;
2424                 stats->opackets += slave_stats.opackets;
2425                 stats->ibytes += slave_stats.ibytes;
2426                 stats->obytes += slave_stats.obytes;
2427                 stats->imissed += slave_stats.imissed;
2428                 stats->ierrors += slave_stats.ierrors;
2429                 stats->oerrors += slave_stats.oerrors;
2430                 stats->rx_nombuf += slave_stats.rx_nombuf;
2431
2432                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2433                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2434                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2435                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2436                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2437                         stats->q_errors[j] += slave_stats.q_errors[j];
2438                 }
2439
2440         }
2441
2442         return 0;
2443 }
2444
2445 static void
2446 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2447 {
2448         struct bond_dev_private *internals = dev->data->dev_private;
2449         int i;
2450
2451         for (i = 0; i < internals->slave_count; i++)
2452                 rte_eth_stats_reset(internals->slaves[i].port_id);
2453 }
2454
2455 static void
2456 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2457 {
2458         struct bond_dev_private *internals = eth_dev->data->dev_private;
2459         int i;
2460
2461         switch (internals->mode) {
2462         /* Promiscuous mode is propagated to all slaves */
2463         case BONDING_MODE_ROUND_ROBIN:
2464         case BONDING_MODE_BALANCE:
2465         case BONDING_MODE_BROADCAST:
2466         case BONDING_MODE_8023AD:
2467                 for (i = 0; i < internals->slave_count; i++) {
2468                         uint16_t port_id = internals->slaves[i].port_id;
2469
2470                         rte_eth_promiscuous_enable(port_id);
2471                 }
2472                 break;
2473         /* Promiscuous mode is propagated only to primary slave */
2474         case BONDING_MODE_ACTIVE_BACKUP:
2475         case BONDING_MODE_TLB:
2476         case BONDING_MODE_ALB:
2477         default:
2478                 /* Do not touch promisc when there cannot be primary ports */
2479                 if (internals->slave_count == 0)
2480                         break;
2481                 rte_eth_promiscuous_enable(internals->current_primary_port);
2482         }
2483 }
2484
2485 static void
2486 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2487 {
2488         struct bond_dev_private *internals = dev->data->dev_private;
2489         int i;
2490
2491         switch (internals->mode) {
2492         /* Promiscuous mode is propagated to all slaves */
2493         case BONDING_MODE_ROUND_ROBIN:
2494         case BONDING_MODE_BALANCE:
2495         case BONDING_MODE_BROADCAST:
2496         case BONDING_MODE_8023AD:
2497                 for (i = 0; i < internals->slave_count; i++) {
2498                         uint16_t port_id = internals->slaves[i].port_id;
2499
2500                         if (internals->mode == BONDING_MODE_8023AD &&
2501                             bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2502                                         BOND_8023AD_FORCED_PROMISC)
2503                                 continue;
2504                         rte_eth_promiscuous_disable(port_id);
2505                 }
2506                 break;
2507         /* Promiscuous mode is propagated only to primary slave */
2508         case BONDING_MODE_ACTIVE_BACKUP:
2509         case BONDING_MODE_TLB:
2510         case BONDING_MODE_ALB:
2511         default:
2512                 /* Do not touch promisc when there cannot be primary ports */
2513                 if (internals->slave_count == 0)
2514                         break;
2515                 rte_eth_promiscuous_disable(internals->current_primary_port);
2516         }
2517 }
2518
2519 static void
2520 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2521 {
2522         struct bond_dev_private *internals = eth_dev->data->dev_private;
2523         int i;
2524
2525         switch (internals->mode) {
2526         /* allmulti mode is propagated to all slaves */
2527         case BONDING_MODE_ROUND_ROBIN:
2528         case BONDING_MODE_BALANCE:
2529         case BONDING_MODE_BROADCAST:
2530         case BONDING_MODE_8023AD:
2531                 for (i = 0; i < internals->slave_count; i++) {
2532                         uint16_t port_id = internals->slaves[i].port_id;
2533
2534                         rte_eth_allmulticast_enable(port_id);
2535                 }
2536                 break;
2537         /* allmulti mode is propagated only to primary slave */
2538         case BONDING_MODE_ACTIVE_BACKUP:
2539         case BONDING_MODE_TLB:
2540         case BONDING_MODE_ALB:
2541         default:
2542                 /* Do not touch allmulti when there cannot be primary ports */
2543                 if (internals->slave_count == 0)
2544                         break;
2545                 rte_eth_allmulticast_enable(internals->current_primary_port);
2546         }
2547 }
2548
2549 static void
2550 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2551 {
2552         struct bond_dev_private *internals = eth_dev->data->dev_private;
2553         int i;
2554
2555         switch (internals->mode) {
2556         /* allmulti mode is propagated to all slaves */
2557         case BONDING_MODE_ROUND_ROBIN:
2558         case BONDING_MODE_BALANCE:
2559         case BONDING_MODE_BROADCAST:
2560         case BONDING_MODE_8023AD:
2561                 for (i = 0; i < internals->slave_count; i++) {
2562                         uint16_t port_id = internals->slaves[i].port_id;
2563
2564                         if (internals->mode == BONDING_MODE_8023AD &&
2565                             bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2566                                         BOND_8023AD_FORCED_ALLMULTI)
2567                                 continue;
2568                         rte_eth_allmulticast_disable(port_id);
2569                 }
2570                 break;
2571         /* allmulti mode is propagated only to primary slave */
2572         case BONDING_MODE_ACTIVE_BACKUP:
2573         case BONDING_MODE_TLB:
2574         case BONDING_MODE_ALB:
2575         default:
2576                 /* Do not touch allmulti when there cannot be primary ports */
2577                 if (internals->slave_count == 0)
2578                         break;
2579                 rte_eth_allmulticast_disable(internals->current_primary_port);
2580         }
2581 }
2582
2583 static void
2584 bond_ethdev_delayed_lsc_propagation(void *arg)
2585 {
2586         if (arg == NULL)
2587                 return;
2588
2589         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2590                         RTE_ETH_EVENT_INTR_LSC, NULL);
2591 }
2592
2593 int
2594 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2595                 void *param, void *ret_param __rte_unused)
2596 {
2597         struct rte_eth_dev *bonded_eth_dev;
2598         struct bond_dev_private *internals;
2599         struct rte_eth_link link;
2600         int rc = -1;
2601
2602         uint8_t lsc_flag = 0;
2603         int valid_slave = 0;
2604         uint16_t active_pos;
2605         uint16_t i;
2606
2607         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2608                 return rc;
2609
2610         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2611
2612         if (check_for_bonded_ethdev(bonded_eth_dev))
2613                 return rc;
2614
2615         internals = bonded_eth_dev->data->dev_private;
2616
2617         /* If the device isn't started don't handle interrupts */
2618         if (!bonded_eth_dev->data->dev_started)
2619                 return rc;
2620
2621         /* verify that port_id is a valid slave of bonded port */
2622         for (i = 0; i < internals->slave_count; i++) {
2623                 if (internals->slaves[i].port_id == port_id) {
2624                         valid_slave = 1;
2625                         break;
2626                 }
2627         }
2628
2629         if (!valid_slave)
2630                 return rc;
2631
2632         /* Synchronize lsc callback parallel calls either by real link event
2633          * from the slaves PMDs or by the bonding PMD itself.
2634          */
2635         rte_spinlock_lock(&internals->lsc_lock);
2636
2637         /* Search for port in active port list */
2638         active_pos = find_slave_by_id(internals->active_slaves,
2639                         internals->active_slave_count, port_id);
2640
2641         rte_eth_link_get_nowait(port_id, &link);
2642         if (link.link_status) {
2643                 if (active_pos < internals->active_slave_count)
2644                         goto link_update;
2645
2646                 /* check link state properties if bonded link is up*/
2647                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2648                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2649                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2650                                              "for slave %d in bonding mode %d",
2651                                              port_id, internals->mode);
2652                 } else {
2653                         /* inherit slave link properties */
2654                         link_properties_set(bonded_eth_dev, &link);
2655                 }
2656
2657                 /* If no active slave ports then set this port to be
2658                  * the primary port.
2659                  */
2660                 if (internals->active_slave_count < 1) {
2661                         /* If first active slave, then change link status */
2662                         bonded_eth_dev->data->dev_link.link_status =
2663                                                                 ETH_LINK_UP;
2664                         internals->current_primary_port = port_id;
2665                         lsc_flag = 1;
2666
2667                         mac_address_slaves_update(bonded_eth_dev);
2668                 }
2669
2670                 activate_slave(bonded_eth_dev, port_id);
2671
2672                 /* If the user has defined the primary port then default to
2673                  * using it.
2674                  */
2675                 if (internals->user_defined_primary_port &&
2676                                 internals->primary_port == port_id)
2677                         bond_ethdev_primary_set(internals, port_id);
2678         } else {
2679                 if (active_pos == internals->active_slave_count)
2680                         goto link_update;
2681
2682                 /* Remove from active slave list */
2683                 deactivate_slave(bonded_eth_dev, port_id);
2684
2685                 if (internals->active_slave_count < 1)
2686                         lsc_flag = 1;
2687
2688                 /* Update primary id, take first active slave from list or if none
2689                  * available set to -1 */
2690                 if (port_id == internals->current_primary_port) {
2691                         if (internals->active_slave_count > 0)
2692                                 bond_ethdev_primary_set(internals,
2693                                                 internals->active_slaves[0]);
2694                         else
2695                                 internals->current_primary_port = internals->primary_port;
2696                 }
2697         }
2698
2699 link_update:
2700         /**
2701          * Update bonded device link properties after any change to active
2702          * slaves
2703          */
2704         bond_ethdev_link_update(bonded_eth_dev, 0);
2705
2706         if (lsc_flag) {
2707                 /* Cancel any possible outstanding interrupts if delays are enabled */
2708                 if (internals->link_up_delay_ms > 0 ||
2709                         internals->link_down_delay_ms > 0)
2710                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2711                                         bonded_eth_dev);
2712
2713                 if (bonded_eth_dev->data->dev_link.link_status) {
2714                         if (internals->link_up_delay_ms > 0)
2715                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2716                                                 bond_ethdev_delayed_lsc_propagation,
2717                                                 (void *)bonded_eth_dev);
2718                         else
2719                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2720                                                 RTE_ETH_EVENT_INTR_LSC,
2721                                                 NULL);
2722
2723                 } else {
2724                         if (internals->link_down_delay_ms > 0)
2725                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2726                                                 bond_ethdev_delayed_lsc_propagation,
2727                                                 (void *)bonded_eth_dev);
2728                         else
2729                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2730                                                 RTE_ETH_EVENT_INTR_LSC,
2731                                                 NULL);
2732                 }
2733         }
2734
2735         rte_spinlock_unlock(&internals->lsc_lock);
2736
2737         return rc;
2738 }
2739
2740 static int
2741 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2742                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2743 {
2744         unsigned i, j;
2745         int result = 0;
2746         int slave_reta_size;
2747         unsigned reta_count;
2748         struct bond_dev_private *internals = dev->data->dev_private;
2749
2750         if (reta_size != internals->reta_size)
2751                 return -EINVAL;
2752
2753          /* Copy RETA table */
2754         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2755
2756         for (i = 0; i < reta_count; i++) {
2757                 internals->reta_conf[i].mask = reta_conf[i].mask;
2758                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2759                         if ((reta_conf[i].mask >> j) & 0x01)
2760                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2761         }
2762
2763         /* Fill rest of array */
2764         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2765                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2766                                 sizeof(internals->reta_conf[0]) * reta_count);
2767
2768         /* Propagate RETA over slaves */
2769         for (i = 0; i < internals->slave_count; i++) {
2770                 slave_reta_size = internals->slaves[i].reta_size;
2771                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2772                                 &internals->reta_conf[0], slave_reta_size);
2773                 if (result < 0)
2774                         return result;
2775         }
2776
2777         return 0;
2778 }
2779
2780 static int
2781 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2782                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2783 {
2784         int i, j;
2785         struct bond_dev_private *internals = dev->data->dev_private;
2786
2787         if (reta_size != internals->reta_size)
2788                 return -EINVAL;
2789
2790          /* Copy RETA table */
2791         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2792                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2793                         if ((reta_conf[i].mask >> j) & 0x01)
2794                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2795
2796         return 0;
2797 }
2798
2799 static int
2800 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2801                 struct rte_eth_rss_conf *rss_conf)
2802 {
2803         int i, result = 0;
2804         struct bond_dev_private *internals = dev->data->dev_private;
2805         struct rte_eth_rss_conf bond_rss_conf;
2806
2807         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2808
2809         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2810
2811         if (bond_rss_conf.rss_hf != 0)
2812                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2813
2814         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2815                         sizeof(internals->rss_key)) {
2816                 if (bond_rss_conf.rss_key_len == 0)
2817                         bond_rss_conf.rss_key_len = 40;
2818                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2819                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2820                                 internals->rss_key_len);
2821         }
2822
2823         for (i = 0; i < internals->slave_count; i++) {
2824                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2825                                 &bond_rss_conf);
2826                 if (result < 0)
2827                         return result;
2828         }
2829
2830         return 0;
2831 }
2832
2833 static int
2834 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2835                 struct rte_eth_rss_conf *rss_conf)
2836 {
2837         struct bond_dev_private *internals = dev->data->dev_private;
2838
2839         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2840         rss_conf->rss_key_len = internals->rss_key_len;
2841         if (rss_conf->rss_key)
2842                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2843
2844         return 0;
2845 }
2846
2847 static int
2848 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2849 {
2850         struct rte_eth_dev *slave_eth_dev;
2851         struct bond_dev_private *internals = dev->data->dev_private;
2852         int ret, i;
2853
2854         rte_spinlock_lock(&internals->lock);
2855
2856         for (i = 0; i < internals->slave_count; i++) {
2857                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2858                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2859                         rte_spinlock_unlock(&internals->lock);
2860                         return -ENOTSUP;
2861                 }
2862         }
2863         for (i = 0; i < internals->slave_count; i++) {
2864                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2865                 if (ret < 0) {
2866                         rte_spinlock_unlock(&internals->lock);
2867                         return ret;
2868                 }
2869         }
2870
2871         rte_spinlock_unlock(&internals->lock);
2872         return 0;
2873 }
2874
2875 static int
2876 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2877                         struct rte_ether_addr *addr)
2878 {
2879         if (mac_address_set(dev, addr)) {
2880                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2881                 return -EINVAL;
2882         }
2883
2884         return 0;
2885 }
2886
2887 static int
2888 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2889                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2890 {
2891         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2892                 *(const void **)arg = &bond_flow_ops;
2893                 return 0;
2894         }
2895         return -ENOTSUP;
2896 }
2897
2898 static int
2899 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2900                         struct rte_ether_addr *mac_addr,
2901                         __rte_unused uint32_t index, uint32_t vmdq)
2902 {
2903         struct rte_eth_dev *slave_eth_dev;
2904         struct bond_dev_private *internals = dev->data->dev_private;
2905         int ret, i;
2906
2907         rte_spinlock_lock(&internals->lock);
2908
2909         for (i = 0; i < internals->slave_count; i++) {
2910                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2911                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2912                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2913                         ret = -ENOTSUP;
2914                         goto end;
2915                 }
2916         }
2917
2918         for (i = 0; i < internals->slave_count; i++) {
2919                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2920                                 mac_addr, vmdq);
2921                 if (ret < 0) {
2922                         /* rollback */
2923                         for (i--; i >= 0; i--)
2924                                 rte_eth_dev_mac_addr_remove(
2925                                         internals->slaves[i].port_id, mac_addr);
2926                         goto end;
2927                 }
2928         }
2929
2930         ret = 0;
2931 end:
2932         rte_spinlock_unlock(&internals->lock);
2933         return ret;
2934 }
2935
2936 static void
2937 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2938 {
2939         struct rte_eth_dev *slave_eth_dev;
2940         struct bond_dev_private *internals = dev->data->dev_private;
2941         int i;
2942
2943         rte_spinlock_lock(&internals->lock);
2944
2945         for (i = 0; i < internals->slave_count; i++) {
2946                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2947                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2948                         goto end;
2949         }
2950
2951         struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
2952
2953         for (i = 0; i < internals->slave_count; i++)
2954                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2955                                 mac_addr);
2956
2957 end:
2958         rte_spinlock_unlock(&internals->lock);
2959 }
2960
2961 const struct eth_dev_ops default_dev_ops = {
2962         .dev_start            = bond_ethdev_start,
2963         .dev_stop             = bond_ethdev_stop,
2964         .dev_close            = bond_ethdev_close,
2965         .dev_configure        = bond_ethdev_configure,
2966         .dev_infos_get        = bond_ethdev_info,
2967         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2968         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2969         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2970         .rx_queue_release     = bond_ethdev_rx_queue_release,
2971         .tx_queue_release     = bond_ethdev_tx_queue_release,
2972         .link_update          = bond_ethdev_link_update,
2973         .stats_get            = bond_ethdev_stats_get,
2974         .stats_reset          = bond_ethdev_stats_reset,
2975         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2976         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2977         .allmulticast_enable  = bond_ethdev_allmulticast_enable,
2978         .allmulticast_disable = bond_ethdev_allmulticast_disable,
2979         .reta_update          = bond_ethdev_rss_reta_update,
2980         .reta_query           = bond_ethdev_rss_reta_query,
2981         .rss_hash_update      = bond_ethdev_rss_hash_update,
2982         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2983         .mtu_set              = bond_ethdev_mtu_set,
2984         .mac_addr_set         = bond_ethdev_mac_address_set,
2985         .mac_addr_add         = bond_ethdev_mac_addr_add,
2986         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
2987         .filter_ctrl          = bond_filter_ctrl
2988 };
2989
2990 static int
2991 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2992 {
2993         const char *name = rte_vdev_device_name(dev);
2994         uint8_t socket_id = dev->device.numa_node;
2995         struct bond_dev_private *internals = NULL;
2996         struct rte_eth_dev *eth_dev = NULL;
2997         uint32_t vlan_filter_bmp_size;
2998
2999         /* now do all data allocation - for eth_dev structure, dummy pci driver
3000          * and internal (private) data
3001          */
3002
3003         /* reserve an ethdev entry */
3004         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3005         if (eth_dev == NULL) {
3006                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3007                 goto err;
3008         }
3009
3010         internals = eth_dev->data->dev_private;
3011         eth_dev->data->nb_rx_queues = (uint16_t)1;
3012         eth_dev->data->nb_tx_queues = (uint16_t)1;
3013
3014         /* Allocate memory for storing MAC addresses */
3015         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3016                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3017         if (eth_dev->data->mac_addrs == NULL) {
3018                 RTE_BOND_LOG(ERR,
3019                              "Failed to allocate %u bytes needed to store MAC addresses",
3020                              RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3021                 goto err;
3022         }
3023
3024         eth_dev->dev_ops = &default_dev_ops;
3025         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3026
3027         rte_spinlock_init(&internals->lock);
3028         rte_spinlock_init(&internals->lsc_lock);
3029
3030         internals->port_id = eth_dev->data->port_id;
3031         internals->mode = BONDING_MODE_INVALID;
3032         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3033         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3034         internals->burst_xmit_hash = burst_xmit_l2_hash;
3035         internals->user_defined_mac = 0;
3036
3037         internals->link_status_polling_enabled = 0;
3038
3039         internals->link_status_polling_interval_ms =
3040                 DEFAULT_POLLING_INTERVAL_10_MS;
3041         internals->link_down_delay_ms = 0;
3042         internals->link_up_delay_ms = 0;
3043
3044         internals->slave_count = 0;
3045         internals->active_slave_count = 0;
3046         internals->rx_offload_capa = 0;
3047         internals->tx_offload_capa = 0;
3048         internals->rx_queue_offload_capa = 0;
3049         internals->tx_queue_offload_capa = 0;
3050         internals->candidate_max_rx_pktlen = 0;
3051         internals->max_rx_pktlen = 0;
3052
3053         /* Initially allow to choose any offload type */
3054         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3055
3056         memset(&internals->default_rxconf, 0,
3057                sizeof(internals->default_rxconf));
3058         memset(&internals->default_txconf, 0,
3059                sizeof(internals->default_txconf));
3060
3061         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3062         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3063
3064         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3065         memset(internals->slaves, 0, sizeof(internals->slaves));
3066
3067         TAILQ_INIT(&internals->flow_list);
3068         internals->flow_isolated_valid = 0;
3069
3070         /* Set mode 4 default configuration */
3071         bond_mode_8023ad_setup(eth_dev, NULL);
3072         if (bond_ethdev_mode_set(eth_dev, mode)) {
3073                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3074                                  eth_dev->data->port_id, mode);
3075                 goto err;
3076         }
3077
3078         vlan_filter_bmp_size =
3079                 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3080         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3081                                                    RTE_CACHE_LINE_SIZE);
3082         if (internals->vlan_filter_bmpmem == NULL) {
3083                 RTE_BOND_LOG(ERR,
3084                              "Failed to allocate vlan bitmap for bonded device %u",
3085                              eth_dev->data->port_id);
3086                 goto err;
3087         }
3088
3089         internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3090                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3091         if (internals->vlan_filter_bmp == NULL) {
3092                 RTE_BOND_LOG(ERR,
3093                              "Failed to init vlan bitmap for bonded device %u",
3094                              eth_dev->data->port_id);
3095                 rte_free(internals->vlan_filter_bmpmem);
3096                 goto err;
3097         }
3098
3099         return eth_dev->data->port_id;
3100
3101 err:
3102         rte_free(internals);
3103         if (eth_dev != NULL)
3104                 eth_dev->data->dev_private = NULL;
3105         rte_eth_dev_release_port(eth_dev);
3106         return -1;
3107 }
3108
3109 static int
3110 bond_probe(struct rte_vdev_device *dev)
3111 {
3112         const char *name;
3113         struct bond_dev_private *internals;
3114         struct rte_kvargs *kvlist;
3115         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3116         int  arg_count, port_id;
3117         uint8_t agg_mode;
3118         struct rte_eth_dev *eth_dev;
3119
3120         if (!dev)
3121                 return -EINVAL;
3122
3123         name = rte_vdev_device_name(dev);
3124         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3125
3126         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3127                 eth_dev = rte_eth_dev_attach_secondary(name);
3128                 if (!eth_dev) {
3129                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3130                         return -1;
3131                 }
3132                 /* TODO: request info from primary to set up Rx and Tx */
3133                 eth_dev->dev_ops = &default_dev_ops;
3134                 eth_dev->device = &dev->device;
3135                 rte_eth_dev_probing_finish(eth_dev);
3136                 return 0;
3137         }
3138
3139         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3140                 pmd_bond_init_valid_arguments);
3141         if (kvlist == NULL)
3142                 return -1;
3143
3144         /* Parse link bonding mode */
3145         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3146                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3147                                 &bond_ethdev_parse_slave_mode_kvarg,
3148                                 &bonding_mode) != 0) {
3149                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3150                                         name);
3151                         goto parse_error;
3152                 }
3153         } else {
3154                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3155                                 "device %s", name);
3156                 goto parse_error;
3157         }
3158
3159         /* Parse socket id to create bonding device on */
3160         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3161         if (arg_count == 1) {
3162                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3163                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3164                                 != 0) {
3165                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3166                                         "bonded device %s", name);
3167                         goto parse_error;
3168                 }
3169         } else if (arg_count > 1) {
3170                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3171                                 "bonded device %s", name);
3172                 goto parse_error;
3173         } else {
3174                 socket_id = rte_socket_id();
3175         }
3176
3177         dev->device.numa_node = socket_id;
3178
3179         /* Create link bonding eth device */
3180         port_id = bond_alloc(dev, bonding_mode);
3181         if (port_id < 0) {
3182                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3183                                 "socket %u.",   name, bonding_mode, socket_id);
3184                 goto parse_error;
3185         }
3186         internals = rte_eth_devices[port_id].data->dev_private;
3187         internals->kvlist = kvlist;
3188
3189         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3190                 if (rte_kvargs_process(kvlist,
3191                                 PMD_BOND_AGG_MODE_KVARG,
3192                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3193                                 &agg_mode) != 0) {
3194                         RTE_BOND_LOG(ERR,
3195                                         "Failed to parse agg selection mode for bonded device %s",
3196                                         name);
3197                         goto parse_error;
3198                 }
3199
3200                 if (internals->mode == BONDING_MODE_8023AD)
3201                         internals->mode4.agg_selection = agg_mode;
3202         } else {
3203                 internals->mode4.agg_selection = AGG_STABLE;
3204         }
3205
3206         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3207         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3208                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3209         return 0;
3210
3211 parse_error:
3212         rte_kvargs_free(kvlist);
3213
3214         return -1;
3215 }
3216
3217 static int
3218 bond_remove(struct rte_vdev_device *dev)
3219 {
3220         struct rte_eth_dev *eth_dev;
3221         struct bond_dev_private *internals;
3222         const char *name;
3223
3224         if (!dev)
3225                 return -EINVAL;
3226
3227         name = rte_vdev_device_name(dev);
3228         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3229
3230         /* now free all data allocation - for eth_dev structure,
3231          * dummy pci driver and internal (private) data
3232          */
3233
3234         /* find an ethdev entry */
3235         eth_dev = rte_eth_dev_allocated(name);
3236         if (eth_dev == NULL)
3237                 return -ENODEV;
3238
3239         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3240                 return rte_eth_dev_release_port(eth_dev);
3241
3242         RTE_ASSERT(eth_dev->device == &dev->device);
3243
3244         internals = eth_dev->data->dev_private;
3245         if (internals->slave_count != 0)
3246                 return -EBUSY;
3247
3248         if (eth_dev->data->dev_started == 1) {
3249                 bond_ethdev_stop(eth_dev);
3250                 bond_ethdev_close(eth_dev);
3251         }
3252
3253         eth_dev->dev_ops = NULL;
3254         eth_dev->rx_pkt_burst = NULL;
3255         eth_dev->tx_pkt_burst = NULL;
3256
3257         internals = eth_dev->data->dev_private;
3258         /* Try to release mempool used in mode6. If the bond
3259          * device is not mode6, free the NULL is not problem.
3260          */
3261         rte_mempool_free(internals->mode6.mempool);
3262         rte_bitmap_free(internals->vlan_filter_bmp);
3263         rte_free(internals->vlan_filter_bmpmem);
3264
3265         rte_eth_dev_release_port(eth_dev);
3266
3267         return 0;
3268 }
3269
3270 /* this part will resolve the slave portids after all the other pdev and vdev
3271  * have been allocated */
3272 static int
3273 bond_ethdev_configure(struct rte_eth_dev *dev)
3274 {
3275         const char *name = dev->device->name;
3276         struct bond_dev_private *internals = dev->data->dev_private;
3277         struct rte_kvargs *kvlist = internals->kvlist;
3278         int arg_count;
3279         uint16_t port_id = dev - rte_eth_devices;
3280         uint8_t agg_mode;
3281
3282         static const uint8_t default_rss_key[40] = {
3283                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3284                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3285                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3286                 0xBE, 0xAC, 0x01, 0xFA
3287         };
3288
3289         unsigned i, j;
3290
3291         /*
3292          * If RSS is enabled, fill table with default values and
3293          * set key to the the value specified in port RSS configuration.
3294          * Fall back to default RSS key if the key is not specified
3295          */
3296         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3297                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3298                         internals->rss_key_len =
3299                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3300                         memcpy(internals->rss_key,
3301                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3302                                internals->rss_key_len);
3303                 } else {
3304                         internals->rss_key_len = sizeof(default_rss_key);
3305                         memcpy(internals->rss_key, default_rss_key,
3306                                internals->rss_key_len);
3307                 }
3308
3309                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3310                         internals->reta_conf[i].mask = ~0LL;
3311                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3312                                 internals->reta_conf[i].reta[j] =
3313                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3314                                                 dev->data->nb_rx_queues;
3315                 }
3316         }
3317
3318         /* set the max_rx_pktlen */
3319         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3320
3321         /*
3322          * if no kvlist, it means that this bonded device has been created
3323          * through the bonding api.
3324          */
3325         if (!kvlist)
3326                 return 0;
3327
3328         /* Parse MAC address for bonded device */
3329         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3330         if (arg_count == 1) {
3331                 struct rte_ether_addr bond_mac;
3332
3333                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3334                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3335                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3336                                      name);
3337                         return -1;
3338                 }
3339
3340                 /* Set MAC address */
3341                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3342                         RTE_BOND_LOG(ERR,
3343                                      "Failed to set mac address on bonded device %s",
3344                                      name);
3345                         return -1;
3346                 }
3347         } else if (arg_count > 1) {
3348                 RTE_BOND_LOG(ERR,
3349                              "MAC address can be specified only once for bonded device %s",
3350                              name);
3351                 return -1;
3352         }
3353
3354         /* Parse/set balance mode transmit policy */
3355         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3356         if (arg_count == 1) {
3357                 uint8_t xmit_policy;
3358
3359                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3360                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3361                     0) {
3362                         RTE_BOND_LOG(INFO,
3363                                      "Invalid xmit policy specified for bonded device %s",
3364                                      name);
3365                         return -1;
3366                 }
3367
3368                 /* Set balance mode transmit policy*/
3369                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3370                         RTE_BOND_LOG(ERR,
3371                                      "Failed to set balance xmit policy on bonded device %s",
3372                                      name);
3373                         return -1;
3374                 }
3375         } else if (arg_count > 1) {
3376                 RTE_BOND_LOG(ERR,
3377                              "Transmit policy can be specified only once for bonded device %s",
3378                              name);
3379                 return -1;
3380         }
3381
3382         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3383                 if (rte_kvargs_process(kvlist,
3384                                        PMD_BOND_AGG_MODE_KVARG,
3385                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3386                                        &agg_mode) != 0) {
3387                         RTE_BOND_LOG(ERR,
3388                                      "Failed to parse agg selection mode for bonded device %s",
3389                                      name);
3390                 }
3391                 if (internals->mode == BONDING_MODE_8023AD) {
3392                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3393                                         agg_mode);
3394                         if (ret < 0) {
3395                                 RTE_BOND_LOG(ERR,
3396                                         "Invalid args for agg selection set for bonded device %s",
3397                                         name);
3398                                 return -1;
3399                         }
3400                 }
3401         }
3402
3403         /* Parse/add slave ports to bonded device */
3404         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3405                 struct bond_ethdev_slave_ports slave_ports;
3406                 unsigned i;
3407
3408                 memset(&slave_ports, 0, sizeof(slave_ports));
3409
3410                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3411                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3412                         RTE_BOND_LOG(ERR,
3413                                      "Failed to parse slave ports for bonded device %s",
3414                                      name);
3415                         return -1;
3416                 }
3417
3418                 for (i = 0; i < slave_ports.slave_count; i++) {
3419                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3420                                 RTE_BOND_LOG(ERR,
3421                                              "Failed to add port %d as slave to bonded device %s",
3422                                              slave_ports.slaves[i], name);
3423                         }
3424                 }
3425
3426         } else {
3427                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3428                 return -1;
3429         }
3430
3431         /* Parse/set primary slave port id*/
3432         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3433         if (arg_count == 1) {
3434                 uint16_t primary_slave_port_id;
3435
3436                 if (rte_kvargs_process(kvlist,
3437                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3438                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3439                                        &primary_slave_port_id) < 0) {
3440                         RTE_BOND_LOG(INFO,
3441                                      "Invalid primary slave port id specified for bonded device %s",
3442                                      name);
3443                         return -1;
3444                 }
3445
3446                 /* Set balance mode transmit policy*/
3447                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3448                     != 0) {
3449                         RTE_BOND_LOG(ERR,
3450                                      "Failed to set primary slave port %d on bonded device %s",
3451                                      primary_slave_port_id, name);
3452                         return -1;
3453                 }
3454         } else if (arg_count > 1) {
3455                 RTE_BOND_LOG(INFO,
3456                              "Primary slave can be specified only once for bonded device %s",
3457                              name);
3458                 return -1;
3459         }
3460
3461         /* Parse link status monitor polling interval */
3462         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3463         if (arg_count == 1) {
3464                 uint32_t lsc_poll_interval_ms;
3465
3466                 if (rte_kvargs_process(kvlist,
3467                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3468                                        &bond_ethdev_parse_time_ms_kvarg,
3469                                        &lsc_poll_interval_ms) < 0) {
3470                         RTE_BOND_LOG(INFO,
3471                                      "Invalid lsc polling interval value specified for bonded"
3472                                      " device %s", name);
3473                         return -1;
3474                 }
3475
3476                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3477                     != 0) {
3478                         RTE_BOND_LOG(ERR,
3479                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3480                                      lsc_poll_interval_ms, name);
3481                         return -1;
3482                 }
3483         } else if (arg_count > 1) {
3484                 RTE_BOND_LOG(INFO,
3485                              "LSC polling interval can be specified only once for bonded"
3486                              " device %s", name);
3487                 return -1;
3488         }
3489
3490         /* Parse link up interrupt propagation delay */
3491         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3492         if (arg_count == 1) {
3493                 uint32_t link_up_delay_ms;
3494
3495                 if (rte_kvargs_process(kvlist,
3496                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3497                                        &bond_ethdev_parse_time_ms_kvarg,
3498                                        &link_up_delay_ms) < 0) {
3499                         RTE_BOND_LOG(INFO,
3500                                      "Invalid link up propagation delay value specified for"
3501                                      " bonded device %s", name);
3502                         return -1;
3503                 }
3504
3505                 /* Set balance mode transmit policy*/
3506                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3507                     != 0) {
3508                         RTE_BOND_LOG(ERR,
3509                                      "Failed to set link up propagation delay (%u ms) on bonded"
3510                                      " device %s", link_up_delay_ms, name);
3511                         return -1;
3512                 }
3513         } else if (arg_count > 1) {
3514                 RTE_BOND_LOG(INFO,
3515                              "Link up propagation delay can be specified only once for"
3516                              " bonded device %s", name);
3517                 return -1;
3518         }
3519
3520         /* Parse link down interrupt propagation delay */
3521         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3522         if (arg_count == 1) {
3523                 uint32_t link_down_delay_ms;
3524
3525                 if (rte_kvargs_process(kvlist,
3526                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3527                                        &bond_ethdev_parse_time_ms_kvarg,
3528                                        &link_down_delay_ms) < 0) {
3529                         RTE_BOND_LOG(INFO,
3530                                      "Invalid link down propagation delay value specified for"
3531                                      " bonded device %s", name);
3532                         return -1;
3533                 }
3534
3535                 /* Set balance mode transmit policy*/
3536                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3537                     != 0) {
3538                         RTE_BOND_LOG(ERR,
3539                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3540                                      link_down_delay_ms, name);
3541                         return -1;
3542                 }
3543         } else if (arg_count > 1) {
3544                 RTE_BOND_LOG(INFO,
3545                              "Link down propagation delay can be specified only once for  bonded device %s",
3546                              name);
3547                 return -1;
3548         }
3549
3550         return 0;
3551 }
3552
3553 struct rte_vdev_driver pmd_bond_drv = {
3554         .probe = bond_probe,
3555         .remove = bond_remove,
3556 };
3557
3558 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3559 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3560
3561 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3562         "slave=<ifc> "
3563         "primary=<ifc> "
3564         "mode=[0-6] "
3565         "xmit_policy=[l2 | l23 | l34] "
3566         "agg_mode=[count | stable | bandwidth] "
3567         "socket_id=<int> "
3568         "mac=<mac addr> "
3569         "lsc_poll_period_ms=<int> "
3570         "up_delay=<int> "
3571         "down_delay=<int>");
3572
3573 int bond_logtype;
3574
3575 RTE_INIT(bond_init_log)
3576 {
3577         bond_logtype = rte_log_register("pmd.net.bond");
3578         if (bond_logtype >= 0)
3579                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3580 }