net/bonding: use evenly distributed default RSS RETA
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_slave = 0;
62         uint16_t num_rx_total = 0;
63
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68
69         internals = bd_rx_q->dev_private;
70
71
72         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
73                 /* Offset of pointer to *bufs increases as packets are received
74                  * from other slaves */
75                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
76                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
77                 if (num_rx_slave) {
78                         num_rx_total += num_rx_slave;
79                         nb_pkts -= num_rx_slave;
80                 }
81         }
82
83         return num_rx_total;
84 }
85
86 static uint16_t
87 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
88                 uint16_t nb_pkts)
89 {
90         struct bond_dev_private *internals;
91
92         /* Cast to structure, containing bonded device's port id and queue id */
93         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
94
95         internals = bd_rx_q->dev_private;
96
97         return rte_eth_rx_burst(internals->current_primary_port,
98                         bd_rx_q->queue_id, bufs, nb_pkts);
99 }
100
101 static inline uint8_t
102 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
103 {
104         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
105
106         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
107                 (ethertype == ether_type_slow_be &&
108                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
109 }
110
111 /*****************************************************************************
112  * Flow director's setup for mode 4 optimization
113  */
114
115 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
116         .dst.addr_bytes = { 0 },
117         .src.addr_bytes = { 0 },
118         .type = RTE_BE16(ETHER_TYPE_SLOW),
119 };
120
121 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = 0xFFFF,
125 };
126
127 static struct rte_flow_item flow_item_8023ad[] = {
128         {
129                 .type = RTE_FLOW_ITEM_TYPE_ETH,
130                 .spec = &flow_item_eth_type_8023ad,
131                 .last = NULL,
132                 .mask = &flow_item_eth_mask_type_8023ad,
133         },
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_END,
136                 .spec = NULL,
137                 .last = NULL,
138                 .mask = NULL,
139         }
140 };
141
142 const struct rte_flow_attr flow_attr_8023ad = {
143         .group = 0,
144         .priority = 0,
145         .ingress = 1,
146         .egress = 0,
147         .reserved = 0,
148 };
149
150 int
151 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
152                 uint16_t slave_port) {
153         struct rte_eth_dev_info slave_info;
154         struct rte_flow_error error;
155         struct bond_dev_private *internals = (struct bond_dev_private *)
156                         (bond_dev->data->dev_private);
157
158         const struct rte_flow_action_queue lacp_queue_conf = {
159                 .index = 0,
160         };
161
162         const struct rte_flow_action actions[] = {
163                 {
164                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
165                         .conf = &lacp_queue_conf
166                 },
167                 {
168                         .type = RTE_FLOW_ACTION_TYPE_END,
169                 }
170         };
171
172         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
173                         flow_item_8023ad, actions, &error);
174         if (ret < 0) {
175                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
176                                 __func__, error.message, slave_port,
177                                 internals->mode4.dedicated_queues.rx_qid);
178                 return -1;
179         }
180
181         rte_eth_dev_info_get(slave_port, &slave_info);
182         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
183                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
184                 RTE_BOND_LOG(ERR,
185                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
186                         __func__, slave_port);
187                 return -1;
188         }
189
190         return 0;
191 }
192
193 int
194 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
195         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
196         struct bond_dev_private *internals = (struct bond_dev_private *)
197                         (bond_dev->data->dev_private);
198         struct rte_eth_dev_info bond_info;
199         uint16_t idx;
200
201         /* Verify if all slaves in bonding supports flow director and */
202         if (internals->slave_count > 0) {
203                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
204
205                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
206                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
207
208                 for (idx = 0; idx < internals->slave_count; idx++) {
209                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
210                                         internals->slaves[idx].port_id) != 0)
211                                 return -1;
212                 }
213         }
214
215         return 0;
216 }
217
218 int
219 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
220
221         struct rte_flow_error error;
222         struct bond_dev_private *internals = (struct bond_dev_private *)
223                         (bond_dev->data->dev_private);
224
225         struct rte_flow_action_queue lacp_queue_conf = {
226                 .index = internals->mode4.dedicated_queues.rx_qid,
227         };
228
229         const struct rte_flow_action actions[] = {
230                 {
231                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
232                         .conf = &lacp_queue_conf
233                 },
234                 {
235                         .type = RTE_FLOW_ACTION_TYPE_END,
236                 }
237         };
238
239         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
240                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
241         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
242                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
243                                 "(slave_port=%d queue_id=%d)",
244                                 error.message, slave_port,
245                                 internals->mode4.dedicated_queues.rx_qid);
246                 return -1;
247         }
248
249         return 0;
250 }
251
252 static uint16_t
253 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
254                 uint16_t nb_pkts)
255 {
256         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
257         struct bond_dev_private *internals = bd_rx_q->dev_private;
258         uint16_t num_rx_total = 0;      /* Total number of received packets */
259         uint16_t slaves[RTE_MAX_ETHPORTS];
260         uint16_t slave_count;
261
262         uint16_t i, idx;
263
264         /* Copy slave list to protect against slave up/down changes during tx
265          * bursting */
266         slave_count = internals->active_slave_count;
267         memcpy(slaves, internals->active_slaves,
268                         sizeof(internals->active_slaves[0]) * slave_count);
269
270         for (i = 0, idx = internals->active_slave;
271                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
272                 idx = idx % slave_count;
273
274                 /* Read packets from this slave */
275                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
276                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
277         }
278
279         internals->active_slave = idx;
280
281         return num_rx_total;
282 }
283
284 static uint16_t
285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
286                 uint16_t nb_bufs)
287 {
288         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
289         struct bond_dev_private *internals = bd_tx_q->dev_private;
290
291         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
292         uint16_t slave_count;
293
294         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
295         uint16_t dist_slave_count;
296
297         /* 2-D array to sort mbufs for transmission on each slave into */
298         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
299         /* Number of mbufs for transmission on each slave */
300         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
301         /* Mapping array generated by hash function to map mbufs to slaves */
302         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
303
304         uint16_t slave_tx_count;
305         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
306
307         uint16_t i;
308
309         if (unlikely(nb_bufs == 0))
310                 return 0;
311
312         /* Copy slave list to protect against slave up/down changes during tx
313          * bursting */
314         slave_count = internals->active_slave_count;
315         if (unlikely(slave_count < 1))
316                 return 0;
317
318         memcpy(slave_port_ids, internals->active_slaves,
319                         sizeof(slave_port_ids[0]) * slave_count);
320
321
322         dist_slave_count = 0;
323         for (i = 0; i < slave_count; i++) {
324                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
325
326                 if (ACTOR_STATE(port, DISTRIBUTING))
327                         dist_slave_port_ids[dist_slave_count++] =
328                                         slave_port_ids[i];
329         }
330
331         if (unlikely(dist_slave_count < 1))
332                 return 0;
333
334         /*
335          * Populate slaves mbuf with the packets which are to be sent on it
336          * selecting output slave using hash based on xmit policy
337          */
338         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
339                         bufs_slave_port_idxs);
340
341         for (i = 0; i < nb_bufs; i++) {
342                 /* Populate slave mbuf arrays with mbufs for that slave. */
343                 uint8_t slave_idx = bufs_slave_port_idxs[i];
344
345                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
346         }
347
348
349         /* Send packet burst on each slave device */
350         for (i = 0; i < dist_slave_count; i++) {
351                 if (slave_nb_bufs[i] == 0)
352                         continue;
353
354                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
355                                 bd_tx_q->queue_id, slave_bufs[i],
356                                 slave_nb_bufs[i]);
357
358                 total_tx_count += slave_tx_count;
359
360                 /* If tx burst fails move packets to end of bufs */
361                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
362                         int slave_tx_fail_count = slave_nb_bufs[i] -
363                                         slave_tx_count;
364                         total_tx_fail_count += slave_tx_fail_count;
365                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
366                                &slave_bufs[i][slave_tx_count],
367                                slave_tx_fail_count * sizeof(bufs[0]));
368                 }
369         }
370
371         return total_tx_count;
372 }
373
374
375 static uint16_t
376 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
377                 uint16_t nb_pkts)
378 {
379         /* Cast to structure, containing bonded device's port id and queue id */
380         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
381         struct bond_dev_private *internals = bd_rx_q->dev_private;
382         struct ether_addr bond_mac;
383
384         struct ether_hdr *hdr;
385
386         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
387         uint16_t num_rx_total = 0;      /* Total number of received packets */
388         uint16_t slaves[RTE_MAX_ETHPORTS];
389         uint16_t slave_count, idx;
390
391         uint8_t collecting;  /* current slave collecting status */
392         const uint8_t promisc = internals->promiscuous_en;
393         uint8_t i, j, k;
394         uint8_t subtype;
395
396         rte_eth_macaddr_get(internals->port_id, &bond_mac);
397         /* Copy slave list to protect against slave up/down changes during tx
398          * bursting */
399         slave_count = internals->active_slave_count;
400         memcpy(slaves, internals->active_slaves,
401                         sizeof(internals->active_slaves[0]) * slave_count);
402
403         idx = internals->active_slave;
404         if (idx >= slave_count) {
405                 internals->active_slave = 0;
406                 idx = 0;
407         }
408         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
409                 j = num_rx_total;
410                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
411                                          COLLECTING);
412
413                 /* Read packets from this slave */
414                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
415                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
416
417                 for (k = j; k < 2 && k < num_rx_total; k++)
418                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
419
420                 /* Handle slow protocol packets. */
421                 while (j < num_rx_total) {
422
423                         /* If packet is not pure L2 and is known, skip it */
424                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
425                                 j++;
426                                 continue;
427                         }
428
429                         if (j + 3 < num_rx_total)
430                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
431
432                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
433                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
434
435                         /* Remove packet from array if it is slow packet or slave is not
436                          * in collecting state or bonding interface is not in promiscuous
437                          * mode and packet address does not match. */
438                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
439                                 !collecting || (!promisc &&
440                                         !is_multicast_ether_addr(&hdr->d_addr) &&
441                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
442
443                                 if (hdr->ether_type == ether_type_slow_be) {
444                                         bond_mode_8023ad_handle_slow_pkt(
445                                             internals, slaves[idx], bufs[j]);
446                                 } else
447                                         rte_pktmbuf_free(bufs[j]);
448
449                                 /* Packet is managed by mode 4 or dropped, shift the array */
450                                 num_rx_total--;
451                                 if (j < num_rx_total) {
452                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
453                                                 (num_rx_total - j));
454                                 }
455                         } else
456                                 j++;
457                 }
458                 if (unlikely(++idx == slave_count))
459                         idx = 0;
460         }
461
462         internals->active_slave = idx;
463         return num_rx_total;
464 }
465
466 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
467 uint32_t burstnumberRX;
468 uint32_t burstnumberTX;
469
470 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
471
472 static void
473 arp_op_name(uint16_t arp_op, char *buf)
474 {
475         switch (arp_op) {
476         case ARP_OP_REQUEST:
477                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
478                 return;
479         case ARP_OP_REPLY:
480                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
481                 return;
482         case ARP_OP_REVREQUEST:
483                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
484                                 "Reverse ARP Request");
485                 return;
486         case ARP_OP_REVREPLY:
487                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
488                                 "Reverse ARP Reply");
489                 return;
490         case ARP_OP_INVREQUEST:
491                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
492                                 "Peer Identify Request");
493                 return;
494         case ARP_OP_INVREPLY:
495                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
496                                 "Peer Identify Reply");
497                 return;
498         default:
499                 break;
500         }
501         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
502         return;
503 }
504 #endif
505 #define MaxIPv4String   16
506 static void
507 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
508 {
509         uint32_t ipv4_addr;
510
511         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
512         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
513                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
514                 ipv4_addr & 0xFF);
515 }
516
517 #define MAX_CLIENTS_NUMBER      128
518 uint8_t active_clients;
519 struct client_stats_t {
520         uint16_t port;
521         uint32_t ipv4_addr;
522         uint32_t ipv4_rx_packets;
523         uint32_t ipv4_tx_packets;
524 };
525 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
526
527 static void
528 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
529 {
530         int i = 0;
531
532         for (; i < MAX_CLIENTS_NUMBER; i++)     {
533                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
534                         /* Just update RX packets number for this client */
535                         if (TXorRXindicator == &burstnumberRX)
536                                 client_stats[i].ipv4_rx_packets++;
537                         else
538                                 client_stats[i].ipv4_tx_packets++;
539                         return;
540                 }
541         }
542         /* We have a new client. Insert him to the table, and increment stats */
543         if (TXorRXindicator == &burstnumberRX)
544                 client_stats[active_clients].ipv4_rx_packets++;
545         else
546                 client_stats[active_clients].ipv4_tx_packets++;
547         client_stats[active_clients].ipv4_addr = addr;
548         client_stats[active_clients].port = port;
549         active_clients++;
550
551 }
552
553 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
554 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
555         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
556                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
557                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
558                 info,                                                   \
559                 port,                                                   \
560                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
561                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
562                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
563                 src_ip,                                                 \
564                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
565                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
566                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
567                 dst_ip,                                                 \
568                 arp_op, ++burstnumber)
569 #endif
570
571 static void
572 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
573                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
574 {
575         struct ipv4_hdr *ipv4_h;
576 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
577         struct arp_hdr *arp_h;
578         char dst_ip[16];
579         char ArpOp[24];
580         char buf[16];
581 #endif
582         char src_ip[16];
583
584         uint16_t ether_type = eth_h->ether_type;
585         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
586
587 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
588         strlcpy(buf, info, 16);
589 #endif
590
591         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
592                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
593                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
594 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
595                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
596                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
597 #endif
598                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
599         }
600 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
601         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
602                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
603                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
604                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
605                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
606                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
607         }
608 #endif
609 }
610 #endif
611
612 static uint16_t
613 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
614 {
615         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
616         struct bond_dev_private *internals = bd_tx_q->dev_private;
617         struct ether_hdr *eth_h;
618         uint16_t ether_type, offset;
619         uint16_t nb_recv_pkts;
620         int i;
621
622         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
623
624         for (i = 0; i < nb_recv_pkts; i++) {
625                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
626                 ether_type = eth_h->ether_type;
627                 offset = get_vlan_offset(eth_h, &ether_type);
628
629                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
630 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
631                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
632 #endif
633                         bond_mode_alb_arp_recv(eth_h, offset, internals);
634                 }
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
637                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
638 #endif
639         }
640
641         return nb_recv_pkts;
642 }
643
644 static uint16_t
645 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
646                 uint16_t nb_pkts)
647 {
648         struct bond_dev_private *internals;
649         struct bond_tx_queue *bd_tx_q;
650
651         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
652         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
653
654         uint16_t num_of_slaves;
655         uint16_t slaves[RTE_MAX_ETHPORTS];
656
657         uint16_t num_tx_total = 0, num_tx_slave;
658
659         static int slave_idx = 0;
660         int i, cslave_idx = 0, tx_fail_total = 0;
661
662         bd_tx_q = (struct bond_tx_queue *)queue;
663         internals = bd_tx_q->dev_private;
664
665         /* Copy slave list to protect against slave up/down changes during tx
666          * bursting */
667         num_of_slaves = internals->active_slave_count;
668         memcpy(slaves, internals->active_slaves,
669                         sizeof(internals->active_slaves[0]) * num_of_slaves);
670
671         if (num_of_slaves < 1)
672                 return num_tx_total;
673
674         /* Populate slaves mbuf with which packets are to be sent on it  */
675         for (i = 0; i < nb_pkts; i++) {
676                 cslave_idx = (slave_idx + i) % num_of_slaves;
677                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
678         }
679
680         /* increment current slave index so the next call to tx burst starts on the
681          * next slave */
682         slave_idx = ++cslave_idx;
683
684         /* Send packet burst on each slave device */
685         for (i = 0; i < num_of_slaves; i++) {
686                 if (slave_nb_pkts[i] > 0) {
687                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
688                                         slave_bufs[i], slave_nb_pkts[i]);
689
690                         /* if tx burst fails move packets to end of bufs */
691                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
692                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
693
694                                 tx_fail_total += tx_fail_slave;
695
696                                 memcpy(&bufs[nb_pkts - tx_fail_total],
697                                        &slave_bufs[i][num_tx_slave],
698                                        tx_fail_slave * sizeof(bufs[0]));
699                         }
700                         num_tx_total += num_tx_slave;
701                 }
702         }
703
704         return num_tx_total;
705 }
706
707 static uint16_t
708 bond_ethdev_tx_burst_active_backup(void *queue,
709                 struct rte_mbuf **bufs, uint16_t nb_pkts)
710 {
711         struct bond_dev_private *internals;
712         struct bond_tx_queue *bd_tx_q;
713
714         bd_tx_q = (struct bond_tx_queue *)queue;
715         internals = bd_tx_q->dev_private;
716
717         if (internals->active_slave_count < 1)
718                 return 0;
719
720         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
721                         bufs, nb_pkts);
722 }
723
724 static inline uint16_t
725 ether_hash(struct ether_hdr *eth_hdr)
726 {
727         unaligned_uint16_t *word_src_addr =
728                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
729         unaligned_uint16_t *word_dst_addr =
730                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
731
732         return (word_src_addr[0] ^ word_dst_addr[0]) ^
733                         (word_src_addr[1] ^ word_dst_addr[1]) ^
734                         (word_src_addr[2] ^ word_dst_addr[2]);
735 }
736
737 static inline uint32_t
738 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
739 {
740         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
741 }
742
743 static inline uint32_t
744 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
745 {
746         unaligned_uint32_t *word_src_addr =
747                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
748         unaligned_uint32_t *word_dst_addr =
749                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
750
751         return (word_src_addr[0] ^ word_dst_addr[0]) ^
752                         (word_src_addr[1] ^ word_dst_addr[1]) ^
753                         (word_src_addr[2] ^ word_dst_addr[2]) ^
754                         (word_src_addr[3] ^ word_dst_addr[3]);
755 }
756
757
758 void
759 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
760                 uint8_t slave_count, uint16_t *slaves)
761 {
762         struct ether_hdr *eth_hdr;
763         uint32_t hash;
764         int i;
765
766         for (i = 0; i < nb_pkts; i++) {
767                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
768
769                 hash = ether_hash(eth_hdr);
770
771                 slaves[i] = (hash ^= hash >> 8) % slave_count;
772         }
773 }
774
775 void
776 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
777                 uint8_t slave_count, uint16_t *slaves)
778 {
779         uint16_t i;
780         struct ether_hdr *eth_hdr;
781         uint16_t proto;
782         size_t vlan_offset;
783         uint32_t hash, l3hash;
784
785         for (i = 0; i < nb_pkts; i++) {
786                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
787                 l3hash = 0;
788
789                 proto = eth_hdr->ether_type;
790                 hash = ether_hash(eth_hdr);
791
792                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
793
794                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
795                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
796                                         ((char *)(eth_hdr + 1) + vlan_offset);
797                         l3hash = ipv4_hash(ipv4_hdr);
798
799                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
800                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
801                                         ((char *)(eth_hdr + 1) + vlan_offset);
802                         l3hash = ipv6_hash(ipv6_hdr);
803                 }
804
805                 hash = hash ^ l3hash;
806                 hash ^= hash >> 16;
807                 hash ^= hash >> 8;
808
809                 slaves[i] = hash % slave_count;
810         }
811 }
812
813 void
814 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
815                 uint8_t slave_count, uint16_t *slaves)
816 {
817         struct ether_hdr *eth_hdr;
818         uint16_t proto;
819         size_t vlan_offset;
820         int i;
821
822         struct udp_hdr *udp_hdr;
823         struct tcp_hdr *tcp_hdr;
824         uint32_t hash, l3hash, l4hash;
825
826         for (i = 0; i < nb_pkts; i++) {
827                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
828                 proto = eth_hdr->ether_type;
829                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
830                 l3hash = 0;
831                 l4hash = 0;
832
833                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
834                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
835                                         ((char *)(eth_hdr + 1) + vlan_offset);
836                         size_t ip_hdr_offset;
837
838                         l3hash = ipv4_hash(ipv4_hdr);
839
840                         /* there is no L4 header in fragmented packet */
841                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
842                                                                 == 0)) {
843                                 ip_hdr_offset = (ipv4_hdr->version_ihl
844                                         & IPV4_HDR_IHL_MASK) *
845                                         IPV4_IHL_MULTIPLIER;
846
847                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
848                                         tcp_hdr = (struct tcp_hdr *)
849                                                 ((char *)ipv4_hdr +
850                                                         ip_hdr_offset);
851                                         l4hash = HASH_L4_PORTS(tcp_hdr);
852                                 } else if (ipv4_hdr->next_proto_id ==
853                                                                 IPPROTO_UDP) {
854                                         udp_hdr = (struct udp_hdr *)
855                                                 ((char *)ipv4_hdr +
856                                                         ip_hdr_offset);
857                                         l4hash = HASH_L4_PORTS(udp_hdr);
858                                 }
859                         }
860                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
861                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
862                                         ((char *)(eth_hdr + 1) + vlan_offset);
863                         l3hash = ipv6_hash(ipv6_hdr);
864
865                         if (ipv6_hdr->proto == IPPROTO_TCP) {
866                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
867                                 l4hash = HASH_L4_PORTS(tcp_hdr);
868                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
869                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
870                                 l4hash = HASH_L4_PORTS(udp_hdr);
871                         }
872                 }
873
874                 hash = l3hash ^ l4hash;
875                 hash ^= hash >> 16;
876                 hash ^= hash >> 8;
877
878                 slaves[i] = hash % slave_count;
879         }
880 }
881
882 struct bwg_slave {
883         uint64_t bwg_left_int;
884         uint64_t bwg_left_remainder;
885         uint8_t slave;
886 };
887
888 void
889 bond_tlb_activate_slave(struct bond_dev_private *internals) {
890         int i;
891
892         for (i = 0; i < internals->active_slave_count; i++) {
893                 tlb_last_obytets[internals->active_slaves[i]] = 0;
894         }
895 }
896
897 static int
898 bandwidth_cmp(const void *a, const void *b)
899 {
900         const struct bwg_slave *bwg_a = a;
901         const struct bwg_slave *bwg_b = b;
902         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
903         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
904                         (int64_t)bwg_a->bwg_left_remainder;
905         if (diff > 0)
906                 return 1;
907         else if (diff < 0)
908                 return -1;
909         else if (diff2 > 0)
910                 return 1;
911         else if (diff2 < 0)
912                 return -1;
913         else
914                 return 0;
915 }
916
917 static void
918 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
919                 struct bwg_slave *bwg_slave)
920 {
921         struct rte_eth_link link_status;
922
923         rte_eth_link_get_nowait(port_id, &link_status);
924         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
925         if (link_bwg == 0)
926                 return;
927         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
928         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
929         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
930 }
931
932 static void
933 bond_ethdev_update_tlb_slave_cb(void *arg)
934 {
935         struct bond_dev_private *internals = arg;
936         struct rte_eth_stats slave_stats;
937         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
938         uint8_t slave_count;
939         uint64_t tx_bytes;
940
941         uint8_t update_stats = 0;
942         uint8_t i, slave_id;
943
944         internals->slave_update_idx++;
945
946
947         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
948                 update_stats = 1;
949
950         for (i = 0; i < internals->active_slave_count; i++) {
951                 slave_id = internals->active_slaves[i];
952                 rte_eth_stats_get(slave_id, &slave_stats);
953                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
954                 bandwidth_left(slave_id, tx_bytes,
955                                 internals->slave_update_idx, &bwg_array[i]);
956                 bwg_array[i].slave = slave_id;
957
958                 if (update_stats) {
959                         tlb_last_obytets[slave_id] = slave_stats.obytes;
960                 }
961         }
962
963         if (update_stats == 1)
964                 internals->slave_update_idx = 0;
965
966         slave_count = i;
967         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
968         for (i = 0; i < slave_count; i++)
969                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
970
971         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
972                         (struct bond_dev_private *)internals);
973 }
974
975 static uint16_t
976 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
977 {
978         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
979         struct bond_dev_private *internals = bd_tx_q->dev_private;
980
981         struct rte_eth_dev *primary_port =
982                         &rte_eth_devices[internals->primary_port];
983         uint16_t num_tx_total = 0;
984         uint16_t i, j;
985
986         uint16_t num_of_slaves = internals->active_slave_count;
987         uint16_t slaves[RTE_MAX_ETHPORTS];
988
989         struct ether_hdr *ether_hdr;
990         struct ether_addr primary_slave_addr;
991         struct ether_addr active_slave_addr;
992
993         if (num_of_slaves < 1)
994                 return num_tx_total;
995
996         memcpy(slaves, internals->tlb_slaves_order,
997                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
998
999
1000         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1001
1002         if (nb_pkts > 3) {
1003                 for (i = 0; i < 3; i++)
1004                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1005         }
1006
1007         for (i = 0; i < num_of_slaves; i++) {
1008                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1009                 for (j = num_tx_total; j < nb_pkts; j++) {
1010                         if (j + 3 < nb_pkts)
1011                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1012
1013                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1014                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1015                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1016 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1017                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1018 #endif
1019                 }
1020
1021                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1022                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1023
1024                 if (num_tx_total == nb_pkts)
1025                         break;
1026         }
1027
1028         return num_tx_total;
1029 }
1030
1031 void
1032 bond_tlb_disable(struct bond_dev_private *internals)
1033 {
1034         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1035 }
1036
1037 void
1038 bond_tlb_enable(struct bond_dev_private *internals)
1039 {
1040         bond_ethdev_update_tlb_slave_cb(internals);
1041 }
1042
1043 static uint16_t
1044 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1045 {
1046         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1047         struct bond_dev_private *internals = bd_tx_q->dev_private;
1048
1049         struct ether_hdr *eth_h;
1050         uint16_t ether_type, offset;
1051
1052         struct client_data *client_info;
1053
1054         /*
1055          * We create transmit buffers for every slave and one additional to send
1056          * through tlb. In worst case every packet will be send on one port.
1057          */
1058         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1059         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1060
1061         /*
1062          * We create separate transmit buffers for update packets as they won't
1063          * be counted in num_tx_total.
1064          */
1065         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1066         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1067
1068         struct rte_mbuf *upd_pkt;
1069         size_t pkt_size;
1070
1071         uint16_t num_send, num_not_send = 0;
1072         uint16_t num_tx_total = 0;
1073         uint16_t slave_idx;
1074
1075         int i, j;
1076
1077         /* Search tx buffer for ARP packets and forward them to alb */
1078         for (i = 0; i < nb_pkts; i++) {
1079                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1080                 ether_type = eth_h->ether_type;
1081                 offset = get_vlan_offset(eth_h, &ether_type);
1082
1083                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1084                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1085
1086                         /* Change src mac in eth header */
1087                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1088
1089                         /* Add packet to slave tx buffer */
1090                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1091                         slave_bufs_pkts[slave_idx]++;
1092                 } else {
1093                         /* If packet is not ARP, send it with TLB policy */
1094                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1095                                         bufs[i];
1096                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1097                 }
1098         }
1099
1100         /* Update connected client ARP tables */
1101         if (internals->mode6.ntt) {
1102                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1103                         client_info = &internals->mode6.client_table[i];
1104
1105                         if (client_info->in_use) {
1106                                 /* Allocate new packet to send ARP update on current slave */
1107                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1108                                 if (upd_pkt == NULL) {
1109                                         RTE_BOND_LOG(ERR,
1110                                                      "Failed to allocate ARP packet from pool");
1111                                         continue;
1112                                 }
1113                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115                                 upd_pkt->data_len = pkt_size;
1116                                 upd_pkt->pkt_len = pkt_size;
1117
1118                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1119                                                 internals);
1120
1121                                 /* Add packet to update tx buffer */
1122                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123                                 update_bufs_pkts[slave_idx]++;
1124                         }
1125                 }
1126                 internals->mode6.ntt = 0;
1127         }
1128
1129         /* Send ARP packets on proper slaves */
1130         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131                 if (slave_bufs_pkts[i] > 0) {
1132                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133                                         slave_bufs[i], slave_bufs_pkts[i]);
1134                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135                                 bufs[nb_pkts - 1 - num_not_send - j] =
1136                                                 slave_bufs[i][nb_pkts - 1 - j];
1137                         }
1138
1139                         num_tx_total += num_send;
1140                         num_not_send += slave_bufs_pkts[i] - num_send;
1141
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143         /* Print TX stats including update packets */
1144                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1147                         }
1148 #endif
1149                 }
1150         }
1151
1152         /* Send update packets on proper slaves */
1153         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154                 if (update_bufs_pkts[i] > 0) {
1155                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156                                         update_bufs_pkts[i]);
1157                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158                                 rte_pktmbuf_free(update_bufs[i][j]);
1159                         }
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send non-ARP packets using tlb policy */
1170         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171                 num_send = bond_ethdev_tx_burst_tlb(queue,
1172                                 slave_bufs[RTE_MAX_ETHPORTS],
1173                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1174
1175                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176                         bufs[nb_pkts - 1 - num_not_send - j] =
1177                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1178                 }
1179
1180                 num_tx_total += num_send;
1181         }
1182
1183         return num_tx_total;
1184 }
1185
1186 static uint16_t
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1188                 uint16_t nb_bufs)
1189 {
1190         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1191         struct bond_dev_private *internals = bd_tx_q->dev_private;
1192
1193         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1194         uint16_t slave_count;
1195
1196         /* Array to sort mbufs for transmission on each slave into */
1197         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1198         /* Number of mbufs for transmission on each slave */
1199         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1200         /* Mapping array generated by hash function to map mbufs to slaves */
1201         uint16_t bufs_slave_port_idxs[nb_bufs];
1202
1203         uint16_t slave_tx_count;
1204         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1205
1206         uint16_t i;
1207
1208         if (unlikely(nb_bufs == 0))
1209                 return 0;
1210
1211         /* Copy slave list to protect against slave up/down changes during tx
1212          * bursting */
1213         slave_count = internals->active_slave_count;
1214         if (unlikely(slave_count < 1))
1215                 return 0;
1216
1217         memcpy(slave_port_ids, internals->active_slaves,
1218                         sizeof(slave_port_ids[0]) * slave_count);
1219
1220         /*
1221          * Populate slaves mbuf with the packets which are to be sent on it
1222          * selecting output slave using hash based on xmit policy
1223          */
1224         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1225                         bufs_slave_port_idxs);
1226
1227         for (i = 0; i < nb_bufs; i++) {
1228                 /* Populate slave mbuf arrays with mbufs for that slave. */
1229                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1230
1231                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1232         }
1233
1234         /* Send packet burst on each slave device */
1235         for (i = 0; i < slave_count; i++) {
1236                 if (slave_nb_bufs[i] == 0)
1237                         continue;
1238
1239                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1240                                 bd_tx_q->queue_id, slave_bufs[i],
1241                                 slave_nb_bufs[i]);
1242
1243                 total_tx_count += slave_tx_count;
1244
1245                 /* If tx burst fails move packets to end of bufs */
1246                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1247                         int slave_tx_fail_count = slave_nb_bufs[i] -
1248                                         slave_tx_count;
1249                         total_tx_fail_count += slave_tx_fail_count;
1250                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1251                                &slave_bufs[i][slave_tx_count],
1252                                slave_tx_fail_count * sizeof(bufs[0]));
1253                 }
1254         }
1255
1256         return total_tx_count;
1257 }
1258
1259 static uint16_t
1260 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1261                 uint16_t nb_bufs)
1262 {
1263         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1264         struct bond_dev_private *internals = bd_tx_q->dev_private;
1265
1266         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1267         uint16_t slave_count;
1268
1269         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1270         uint16_t dist_slave_count;
1271
1272         /* 2-D array to sort mbufs for transmission on each slave into */
1273         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1274         /* Number of mbufs for transmission on each slave */
1275         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1276         /* Mapping array generated by hash function to map mbufs to slaves */
1277         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1278
1279         uint16_t slave_tx_count;
1280         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1281
1282         uint16_t i;
1283
1284         if (unlikely(nb_bufs == 0))
1285                 return 0;
1286
1287         /* Copy slave list to protect against slave up/down changes during tx
1288          * bursting */
1289         slave_count = internals->active_slave_count;
1290         if (unlikely(slave_count < 1))
1291                 return 0;
1292
1293         memcpy(slave_port_ids, internals->active_slaves,
1294                         sizeof(slave_port_ids[0]) * slave_count);
1295
1296         dist_slave_count = 0;
1297         for (i = 0; i < slave_count; i++) {
1298                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1299
1300                 if (ACTOR_STATE(port, DISTRIBUTING))
1301                         dist_slave_port_ids[dist_slave_count++] =
1302                                         slave_port_ids[i];
1303         }
1304
1305         if (likely(dist_slave_count > 1)) {
1306
1307                 /*
1308                  * Populate slaves mbuf with the packets which are to be sent
1309                  * on it, selecting output slave using hash based on xmit policy
1310                  */
1311                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1312                                 bufs_slave_port_idxs);
1313
1314                 for (i = 0; i < nb_bufs; i++) {
1315                         /*
1316                          * Populate slave mbuf arrays with mbufs for that
1317                          * slave
1318                          */
1319                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1320
1321                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1322                                         bufs[i];
1323                 }
1324
1325
1326                 /* Send packet burst on each slave device */
1327                 for (i = 0; i < dist_slave_count; i++) {
1328                         if (slave_nb_bufs[i] == 0)
1329                                 continue;
1330
1331                         slave_tx_count = rte_eth_tx_burst(
1332                                         dist_slave_port_ids[i],
1333                                         bd_tx_q->queue_id, slave_bufs[i],
1334                                         slave_nb_bufs[i]);
1335
1336                         total_tx_count += slave_tx_count;
1337
1338                         /* If tx burst fails move packets to end of bufs */
1339                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1340                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1341                                                 slave_tx_count;
1342                                 total_tx_fail_count += slave_tx_fail_count;
1343
1344                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1345                                        &slave_bufs[i][slave_tx_count],
1346                                        slave_tx_fail_count * sizeof(bufs[0]));
1347                         }
1348                 }
1349         }
1350
1351         /* Check for LACP control packets and send if available */
1352         for (i = 0; i < slave_count; i++) {
1353                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1354                 struct rte_mbuf *ctrl_pkt = NULL;
1355
1356                 if (likely(rte_ring_empty(port->tx_ring)))
1357                         continue;
1358
1359                 if (rte_ring_dequeue(port->tx_ring,
1360                                      (void **)&ctrl_pkt) != -ENOENT) {
1361                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1362                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1363                         /*
1364                          * re-enqueue LAG control plane packets to buffering
1365                          * ring if transmission fails so the packet isn't lost.
1366                          */
1367                         if (slave_tx_count != 1)
1368                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1369                 }
1370         }
1371
1372         return total_tx_count;
1373 }
1374
1375 static uint16_t
1376 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1377                 uint16_t nb_pkts)
1378 {
1379         struct bond_dev_private *internals;
1380         struct bond_tx_queue *bd_tx_q;
1381
1382         uint8_t tx_failed_flag = 0, num_of_slaves;
1383         uint16_t slaves[RTE_MAX_ETHPORTS];
1384
1385         uint16_t max_nb_of_tx_pkts = 0;
1386
1387         int slave_tx_total[RTE_MAX_ETHPORTS];
1388         int i, most_successful_tx_slave = -1;
1389
1390         bd_tx_q = (struct bond_tx_queue *)queue;
1391         internals = bd_tx_q->dev_private;
1392
1393         /* Copy slave list to protect against slave up/down changes during tx
1394          * bursting */
1395         num_of_slaves = internals->active_slave_count;
1396         memcpy(slaves, internals->active_slaves,
1397                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1398
1399         if (num_of_slaves < 1)
1400                 return 0;
1401
1402         /* Increment reference count on mbufs */
1403         for (i = 0; i < nb_pkts; i++)
1404                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1405
1406         /* Transmit burst on each active slave */
1407         for (i = 0; i < num_of_slaves; i++) {
1408                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1409                                         bufs, nb_pkts);
1410
1411                 if (unlikely(slave_tx_total[i] < nb_pkts))
1412                         tx_failed_flag = 1;
1413
1414                 /* record the value and slave index for the slave which transmits the
1415                  * maximum number of packets */
1416                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1417                         max_nb_of_tx_pkts = slave_tx_total[i];
1418                         most_successful_tx_slave = i;
1419                 }
1420         }
1421
1422         /* if slaves fail to transmit packets from burst, the calling application
1423          * is not expected to know about multiple references to packets so we must
1424          * handle failures of all packets except those of the most successful slave
1425          */
1426         if (unlikely(tx_failed_flag))
1427                 for (i = 0; i < num_of_slaves; i++)
1428                         if (i != most_successful_tx_slave)
1429                                 while (slave_tx_total[i] < nb_pkts)
1430                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1431
1432         return max_nb_of_tx_pkts;
1433 }
1434
1435 void
1436 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1437 {
1438         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1439
1440         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1441                 /**
1442                  * If in mode 4 then save the link properties of the first
1443                  * slave, all subsequent slaves must match these properties
1444                  */
1445                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1446
1447                 bond_link->link_autoneg = slave_link->link_autoneg;
1448                 bond_link->link_duplex = slave_link->link_duplex;
1449                 bond_link->link_speed = slave_link->link_speed;
1450         } else {
1451                 /**
1452                  * In any other mode the link properties are set to default
1453                  * values of AUTONEG/DUPLEX
1454                  */
1455                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1456                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1457         }
1458 }
1459
1460 int
1461 link_properties_valid(struct rte_eth_dev *ethdev,
1462                 struct rte_eth_link *slave_link)
1463 {
1464         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1465
1466         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1467                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1468
1469                 if (bond_link->link_duplex != slave_link->link_duplex ||
1470                         bond_link->link_autoneg != slave_link->link_autoneg ||
1471                         bond_link->link_speed != slave_link->link_speed)
1472                         return -1;
1473         }
1474
1475         return 0;
1476 }
1477
1478 int
1479 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1480 {
1481         struct ether_addr *mac_addr;
1482
1483         if (eth_dev == NULL) {
1484                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1485                 return -1;
1486         }
1487
1488         if (dst_mac_addr == NULL) {
1489                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1490                 return -1;
1491         }
1492
1493         mac_addr = eth_dev->data->mac_addrs;
1494
1495         ether_addr_copy(mac_addr, dst_mac_addr);
1496         return 0;
1497 }
1498
1499 int
1500 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1501 {
1502         struct ether_addr *mac_addr;
1503
1504         if (eth_dev == NULL) {
1505                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1506                 return -1;
1507         }
1508
1509         if (new_mac_addr == NULL) {
1510                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1511                 return -1;
1512         }
1513
1514         mac_addr = eth_dev->data->mac_addrs;
1515
1516         /* If new MAC is different to current MAC then update */
1517         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1518                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1519
1520         return 0;
1521 }
1522
1523 static const struct ether_addr null_mac_addr;
1524
1525 /*
1526  * Add additional MAC addresses to the slave
1527  */
1528 int
1529 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1530                 uint16_t slave_port_id)
1531 {
1532         int i, ret;
1533         struct ether_addr *mac_addr;
1534
1535         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1536                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1537                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1538                         break;
1539
1540                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1541                 if (ret < 0) {
1542                         /* rollback */
1543                         for (i--; i > 0; i--)
1544                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1545                                         &bonded_eth_dev->data->mac_addrs[i]);
1546                         return ret;
1547                 }
1548         }
1549
1550         return 0;
1551 }
1552
1553 /*
1554  * Remove additional MAC addresses from the slave
1555  */
1556 int
1557 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1558                 uint16_t slave_port_id)
1559 {
1560         int i, rc, ret;
1561         struct ether_addr *mac_addr;
1562
1563         rc = 0;
1564         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1565                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1566                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1567                         break;
1568
1569                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1570                 /* save only the first error */
1571                 if (ret < 0 && rc == 0)
1572                         rc = ret;
1573         }
1574
1575         return rc;
1576 }
1577
1578 int
1579 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1580 {
1581         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1582         int i;
1583
1584         /* Update slave devices MAC addresses */
1585         if (internals->slave_count < 1)
1586                 return -1;
1587
1588         switch (internals->mode) {
1589         case BONDING_MODE_ROUND_ROBIN:
1590         case BONDING_MODE_BALANCE:
1591         case BONDING_MODE_BROADCAST:
1592                 for (i = 0; i < internals->slave_count; i++) {
1593                         if (rte_eth_dev_default_mac_addr_set(
1594                                         internals->slaves[i].port_id,
1595                                         bonded_eth_dev->data->mac_addrs)) {
1596                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1597                                                 internals->slaves[i].port_id);
1598                                 return -1;
1599                         }
1600                 }
1601                 break;
1602         case BONDING_MODE_8023AD:
1603                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1604                 break;
1605         case BONDING_MODE_ACTIVE_BACKUP:
1606         case BONDING_MODE_TLB:
1607         case BONDING_MODE_ALB:
1608         default:
1609                 for (i = 0; i < internals->slave_count; i++) {
1610                         if (internals->slaves[i].port_id ==
1611                                         internals->current_primary_port) {
1612                                 if (rte_eth_dev_default_mac_addr_set(
1613                                                 internals->primary_port,
1614                                                 bonded_eth_dev->data->mac_addrs)) {
1615                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1616                                                         internals->current_primary_port);
1617                                         return -1;
1618                                 }
1619                         } else {
1620                                 if (rte_eth_dev_default_mac_addr_set(
1621                                                 internals->slaves[i].port_id,
1622                                                 &internals->slaves[i].persisted_mac_addr)) {
1623                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1624                                                         internals->slaves[i].port_id);
1625                                         return -1;
1626                                 }
1627                         }
1628                 }
1629         }
1630
1631         return 0;
1632 }
1633
1634 int
1635 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1636 {
1637         struct bond_dev_private *internals;
1638
1639         internals = eth_dev->data->dev_private;
1640
1641         switch (mode) {
1642         case BONDING_MODE_ROUND_ROBIN:
1643                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1644                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1645                 break;
1646         case BONDING_MODE_ACTIVE_BACKUP:
1647                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1648                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1649                 break;
1650         case BONDING_MODE_BALANCE:
1651                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1652                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1653                 break;
1654         case BONDING_MODE_BROADCAST:
1655                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1656                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1657                 break;
1658         case BONDING_MODE_8023AD:
1659                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1660                         return -1;
1661
1662                 if (internals->mode4.dedicated_queues.enabled == 0) {
1663                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1664                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1665                         RTE_BOND_LOG(WARNING,
1666                                 "Using mode 4, it is necessary to do TX burst "
1667                                 "and RX burst at least every 100ms.");
1668                 } else {
1669                         /* Use flow director's optimization */
1670                         eth_dev->rx_pkt_burst =
1671                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1672                         eth_dev->tx_pkt_burst =
1673                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1674                 }
1675                 break;
1676         case BONDING_MODE_TLB:
1677                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1678                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1679                 break;
1680         case BONDING_MODE_ALB:
1681                 if (bond_mode_alb_enable(eth_dev) != 0)
1682                         return -1;
1683
1684                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1685                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1686                 break;
1687         default:
1688                 return -1;
1689         }
1690
1691         internals->mode = mode;
1692
1693         return 0;
1694 }
1695
1696
1697 static int
1698 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1699                 struct rte_eth_dev *slave_eth_dev)
1700 {
1701         int errval = 0;
1702         struct bond_dev_private *internals = (struct bond_dev_private *)
1703                 bonded_eth_dev->data->dev_private;
1704         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1705
1706         if (port->slow_pool == NULL) {
1707                 char mem_name[256];
1708                 int slave_id = slave_eth_dev->data->port_id;
1709
1710                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1711                                 slave_id);
1712                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1713                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1714                         slave_eth_dev->data->numa_node);
1715
1716                 /* Any memory allocation failure in initialization is critical because
1717                  * resources can't be free, so reinitialization is impossible. */
1718                 if (port->slow_pool == NULL) {
1719                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1720                                 slave_id, mem_name, rte_strerror(rte_errno));
1721                 }
1722         }
1723
1724         if (internals->mode4.dedicated_queues.enabled == 1) {
1725                 /* Configure slow Rx queue */
1726
1727                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1728                                 internals->mode4.dedicated_queues.rx_qid, 128,
1729                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1730                                 NULL, port->slow_pool);
1731                 if (errval != 0) {
1732                         RTE_BOND_LOG(ERR,
1733                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1734                                         slave_eth_dev->data->port_id,
1735                                         internals->mode4.dedicated_queues.rx_qid,
1736                                         errval);
1737                         return errval;
1738                 }
1739
1740                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1741                                 internals->mode4.dedicated_queues.tx_qid, 512,
1742                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1743                                 NULL);
1744                 if (errval != 0) {
1745                         RTE_BOND_LOG(ERR,
1746                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1747                                 slave_eth_dev->data->port_id,
1748                                 internals->mode4.dedicated_queues.tx_qid,
1749                                 errval);
1750                         return errval;
1751                 }
1752         }
1753         return 0;
1754 }
1755
1756 int
1757 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1758                 struct rte_eth_dev *slave_eth_dev)
1759 {
1760         struct bond_rx_queue *bd_rx_q;
1761         struct bond_tx_queue *bd_tx_q;
1762         uint16_t nb_rx_queues;
1763         uint16_t nb_tx_queues;
1764
1765         int errval;
1766         uint16_t q_id;
1767         struct rte_flow_error flow_error;
1768
1769         struct bond_dev_private *internals = (struct bond_dev_private *)
1770                 bonded_eth_dev->data->dev_private;
1771
1772         /* Stop slave */
1773         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1774
1775         /* Enable interrupts on slave device if supported */
1776         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1777                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1778
1779         /* If RSS is enabled for bonding, try to enable it for slaves  */
1780         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1781                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1782                                 != 0) {
1783                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1784                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1785                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1786                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1787                 } else {
1788                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1789                 }
1790
1791                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1792                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1793                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1794                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1795         }
1796
1797         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1798                         DEV_RX_OFFLOAD_VLAN_FILTER)
1799                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1800                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1801         else
1802                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1803                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1804
1805         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1806         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1807
1808         if (internals->mode == BONDING_MODE_8023AD) {
1809                 if (internals->mode4.dedicated_queues.enabled == 1) {
1810                         nb_rx_queues++;
1811                         nb_tx_queues++;
1812                 }
1813         }
1814
1815         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1816                                      bonded_eth_dev->data->mtu);
1817         if (errval != 0 && errval != -ENOTSUP) {
1818                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1819                                 slave_eth_dev->data->port_id, errval);
1820                 return errval;
1821         }
1822
1823         /* Configure device */
1824         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1825                         nb_rx_queues, nb_tx_queues,
1826                         &(slave_eth_dev->data->dev_conf));
1827         if (errval != 0) {
1828                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1829                                 slave_eth_dev->data->port_id, errval);
1830                 return errval;
1831         }
1832
1833         /* Setup Rx Queues */
1834         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1835                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1836
1837                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1838                                 bd_rx_q->nb_rx_desc,
1839                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1840                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1841                 if (errval != 0) {
1842                         RTE_BOND_LOG(ERR,
1843                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1844                                         slave_eth_dev->data->port_id, q_id, errval);
1845                         return errval;
1846                 }
1847         }
1848
1849         /* Setup Tx Queues */
1850         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1851                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1852
1853                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1854                                 bd_tx_q->nb_tx_desc,
1855                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1856                                 &bd_tx_q->tx_conf);
1857                 if (errval != 0) {
1858                         RTE_BOND_LOG(ERR,
1859                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1860                                 slave_eth_dev->data->port_id, q_id, errval);
1861                         return errval;
1862                 }
1863         }
1864
1865         if (internals->mode == BONDING_MODE_8023AD &&
1866                         internals->mode4.dedicated_queues.enabled == 1) {
1867                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1868                                 != 0)
1869                         return errval;
1870
1871                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1872                                 slave_eth_dev->data->port_id) != 0) {
1873                         RTE_BOND_LOG(ERR,
1874                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1875                                 slave_eth_dev->data->port_id, q_id, errval);
1876                         return -1;
1877                 }
1878
1879                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1880                         rte_flow_destroy(slave_eth_dev->data->port_id,
1881                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1882                                         &flow_error);
1883
1884                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1885                                 slave_eth_dev->data->port_id);
1886         }
1887
1888         /* Start device */
1889         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1890         if (errval != 0) {
1891                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1892                                 slave_eth_dev->data->port_id, errval);
1893                 return -1;
1894         }
1895
1896         /* If RSS is enabled for bonding, synchronize RETA */
1897         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1898                 int i;
1899                 struct bond_dev_private *internals;
1900
1901                 internals = bonded_eth_dev->data->dev_private;
1902
1903                 for (i = 0; i < internals->slave_count; i++) {
1904                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1905                                 errval = rte_eth_dev_rss_reta_update(
1906                                                 slave_eth_dev->data->port_id,
1907                                                 &internals->reta_conf[0],
1908                                                 internals->slaves[i].reta_size);
1909                                 if (errval != 0) {
1910                                         RTE_BOND_LOG(WARNING,
1911                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1912                                                      " RSS Configuration for bonding may be inconsistent.",
1913                                                      slave_eth_dev->data->port_id, errval);
1914                                 }
1915                                 break;
1916                         }
1917                 }
1918         }
1919
1920         /* If lsc interrupt is set, check initial slave's link status */
1921         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1922                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1923                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1924                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1925                         NULL);
1926         }
1927
1928         return 0;
1929 }
1930
1931 void
1932 slave_remove(struct bond_dev_private *internals,
1933                 struct rte_eth_dev *slave_eth_dev)
1934 {
1935         uint8_t i;
1936
1937         for (i = 0; i < internals->slave_count; i++)
1938                 if (internals->slaves[i].port_id ==
1939                                 slave_eth_dev->data->port_id)
1940                         break;
1941
1942         if (i < (internals->slave_count - 1)) {
1943                 struct rte_flow *flow;
1944
1945                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1946                                 sizeof(internals->slaves[0]) *
1947                                 (internals->slave_count - i - 1));
1948                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1949                         memmove(&flow->flows[i], &flow->flows[i + 1],
1950                                 sizeof(flow->flows[0]) *
1951                                 (internals->slave_count - i - 1));
1952                         flow->flows[internals->slave_count - 1] = NULL;
1953                 }
1954         }
1955
1956         internals->slave_count--;
1957
1958         /* force reconfiguration of slave interfaces */
1959         _rte_eth_dev_reset(slave_eth_dev);
1960 }
1961
1962 static void
1963 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1964
1965 void
1966 slave_add(struct bond_dev_private *internals,
1967                 struct rte_eth_dev *slave_eth_dev)
1968 {
1969         struct bond_slave_details *slave_details =
1970                         &internals->slaves[internals->slave_count];
1971
1972         slave_details->port_id = slave_eth_dev->data->port_id;
1973         slave_details->last_link_status = 0;
1974
1975         /* Mark slave devices that don't support interrupts so we can
1976          * compensate when we start the bond
1977          */
1978         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1979                 slave_details->link_status_poll_enabled = 1;
1980         }
1981
1982         slave_details->link_status_wait_to_complete = 0;
1983         /* clean tlb_last_obytes when adding port for bonding device */
1984         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1985                         sizeof(struct ether_addr));
1986 }
1987
1988 void
1989 bond_ethdev_primary_set(struct bond_dev_private *internals,
1990                 uint16_t slave_port_id)
1991 {
1992         int i;
1993
1994         if (internals->active_slave_count < 1)
1995                 internals->current_primary_port = slave_port_id;
1996         else
1997                 /* Search bonded device slave ports for new proposed primary port */
1998                 for (i = 0; i < internals->active_slave_count; i++) {
1999                         if (internals->active_slaves[i] == slave_port_id)
2000                                 internals->current_primary_port = slave_port_id;
2001                 }
2002 }
2003
2004 static void
2005 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2006
2007 static int
2008 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2009 {
2010         struct bond_dev_private *internals;
2011         int i;
2012
2013         /* slave eth dev will be started by bonded device */
2014         if (check_for_bonded_ethdev(eth_dev)) {
2015                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2016                                 eth_dev->data->port_id);
2017                 return -1;
2018         }
2019
2020         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2021         eth_dev->data->dev_started = 1;
2022
2023         internals = eth_dev->data->dev_private;
2024
2025         if (internals->slave_count == 0) {
2026                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2027                 goto out_err;
2028         }
2029
2030         if (internals->user_defined_mac == 0) {
2031                 struct ether_addr *new_mac_addr = NULL;
2032
2033                 for (i = 0; i < internals->slave_count; i++)
2034                         if (internals->slaves[i].port_id == internals->primary_port)
2035                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2036
2037                 if (new_mac_addr == NULL)
2038                         goto out_err;
2039
2040                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2041                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2042                                         eth_dev->data->port_id);
2043                         goto out_err;
2044                 }
2045         }
2046
2047         /* If bonded device is configure in promiscuous mode then re-apply config */
2048         if (internals->promiscuous_en)
2049                 bond_ethdev_promiscuous_enable(eth_dev);
2050
2051         if (internals->mode == BONDING_MODE_8023AD) {
2052                 if (internals->mode4.dedicated_queues.enabled == 1) {
2053                         internals->mode4.dedicated_queues.rx_qid =
2054                                         eth_dev->data->nb_rx_queues;
2055                         internals->mode4.dedicated_queues.tx_qid =
2056                                         eth_dev->data->nb_tx_queues;
2057                 }
2058         }
2059
2060
2061         /* Reconfigure each slave device if starting bonded device */
2062         for (i = 0; i < internals->slave_count; i++) {
2063                 struct rte_eth_dev *slave_ethdev =
2064                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2065                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2066                         RTE_BOND_LOG(ERR,
2067                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2068                                 eth_dev->data->port_id,
2069                                 internals->slaves[i].port_id);
2070                         goto out_err;
2071                 }
2072                 /* We will need to poll for link status if any slave doesn't
2073                  * support interrupts
2074                  */
2075                 if (internals->slaves[i].link_status_poll_enabled)
2076                         internals->link_status_polling_enabled = 1;
2077         }
2078
2079         /* start polling if needed */
2080         if (internals->link_status_polling_enabled) {
2081                 rte_eal_alarm_set(
2082                         internals->link_status_polling_interval_ms * 1000,
2083                         bond_ethdev_slave_link_status_change_monitor,
2084                         (void *)&rte_eth_devices[internals->port_id]);
2085         }
2086
2087         /* Update all slave devices MACs*/
2088         if (mac_address_slaves_update(eth_dev) != 0)
2089                 goto out_err;
2090
2091         if (internals->user_defined_primary_port)
2092                 bond_ethdev_primary_set(internals, internals->primary_port);
2093
2094         if (internals->mode == BONDING_MODE_8023AD)
2095                 bond_mode_8023ad_start(eth_dev);
2096
2097         if (internals->mode == BONDING_MODE_TLB ||
2098                         internals->mode == BONDING_MODE_ALB)
2099                 bond_tlb_enable(internals);
2100
2101         return 0;
2102
2103 out_err:
2104         eth_dev->data->dev_started = 0;
2105         return -1;
2106 }
2107
2108 static void
2109 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2110 {
2111         uint8_t i;
2112
2113         if (dev->data->rx_queues != NULL) {
2114                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2115                         rte_free(dev->data->rx_queues[i]);
2116                         dev->data->rx_queues[i] = NULL;
2117                 }
2118                 dev->data->nb_rx_queues = 0;
2119         }
2120
2121         if (dev->data->tx_queues != NULL) {
2122                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2123                         rte_free(dev->data->tx_queues[i]);
2124                         dev->data->tx_queues[i] = NULL;
2125                 }
2126                 dev->data->nb_tx_queues = 0;
2127         }
2128 }
2129
2130 void
2131 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2132 {
2133         struct bond_dev_private *internals = eth_dev->data->dev_private;
2134         uint8_t i;
2135
2136         if (internals->mode == BONDING_MODE_8023AD) {
2137                 struct port *port;
2138                 void *pkt = NULL;
2139
2140                 bond_mode_8023ad_stop(eth_dev);
2141
2142                 /* Discard all messages to/from mode 4 state machines */
2143                 for (i = 0; i < internals->active_slave_count; i++) {
2144                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2145
2146                         RTE_ASSERT(port->rx_ring != NULL);
2147                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2148                                 rte_pktmbuf_free(pkt);
2149
2150                         RTE_ASSERT(port->tx_ring != NULL);
2151                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2152                                 rte_pktmbuf_free(pkt);
2153                 }
2154         }
2155
2156         if (internals->mode == BONDING_MODE_TLB ||
2157                         internals->mode == BONDING_MODE_ALB) {
2158                 bond_tlb_disable(internals);
2159                 for (i = 0; i < internals->active_slave_count; i++)
2160                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2161         }
2162
2163         internals->link_status_polling_enabled = 0;
2164         for (i = 0; i < internals->slave_count; i++)
2165                 internals->slaves[i].last_link_status = 0;
2166
2167         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2168         eth_dev->data->dev_started = 0;
2169 }
2170
2171 void
2172 bond_ethdev_close(struct rte_eth_dev *dev)
2173 {
2174         struct bond_dev_private *internals = dev->data->dev_private;
2175         uint8_t bond_port_id = internals->port_id;
2176         int skipped = 0;
2177         struct rte_flow_error ferror;
2178
2179         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2180         while (internals->slave_count != skipped) {
2181                 uint16_t port_id = internals->slaves[skipped].port_id;
2182
2183                 rte_eth_dev_stop(port_id);
2184
2185                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2186                         RTE_BOND_LOG(ERR,
2187                                      "Failed to remove port %d from bonded device %s",
2188                                      port_id, dev->device->name);
2189                         skipped++;
2190                 }
2191         }
2192         bond_flow_ops.flush(dev, &ferror);
2193         bond_ethdev_free_queues(dev);
2194         rte_bitmap_reset(internals->vlan_filter_bmp);
2195 }
2196
2197 /* forward declaration */
2198 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2199
2200 static void
2201 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2202 {
2203         struct bond_dev_private *internals = dev->data->dev_private;
2204
2205         uint16_t max_nb_rx_queues = UINT16_MAX;
2206         uint16_t max_nb_tx_queues = UINT16_MAX;
2207
2208         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2209
2210         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2211                         internals->candidate_max_rx_pktlen :
2212                         ETHER_MAX_JUMBO_FRAME_LEN;
2213
2214         /* Max number of tx/rx queues that the bonded device can support is the
2215          * minimum values of the bonded slaves, as all slaves must be capable
2216          * of supporting the same number of tx/rx queues.
2217          */
2218         if (internals->slave_count > 0) {
2219                 struct rte_eth_dev_info slave_info;
2220                 uint8_t idx;
2221
2222                 for (idx = 0; idx < internals->slave_count; idx++) {
2223                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2224                                         &slave_info);
2225
2226                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2227                                 max_nb_rx_queues = slave_info.max_rx_queues;
2228
2229                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2230                                 max_nb_tx_queues = slave_info.max_tx_queues;
2231                 }
2232         }
2233
2234         dev_info->max_rx_queues = max_nb_rx_queues;
2235         dev_info->max_tx_queues = max_nb_tx_queues;
2236
2237         /**
2238          * If dedicated hw queues enabled for link bonding device in LACP mode
2239          * then we need to reduce the maximum number of data path queues by 1.
2240          */
2241         if (internals->mode == BONDING_MODE_8023AD &&
2242                 internals->mode4.dedicated_queues.enabled == 1) {
2243                 dev_info->max_rx_queues--;
2244                 dev_info->max_tx_queues--;
2245         }
2246
2247         dev_info->min_rx_bufsize = 0;
2248
2249         dev_info->rx_offload_capa = internals->rx_offload_capa;
2250         dev_info->tx_offload_capa = internals->tx_offload_capa;
2251         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2252         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2253         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2254
2255         dev_info->reta_size = internals->reta_size;
2256 }
2257
2258 static int
2259 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2260 {
2261         int res;
2262         uint16_t i;
2263         struct bond_dev_private *internals = dev->data->dev_private;
2264
2265         /* don't do this while a slave is being added */
2266         rte_spinlock_lock(&internals->lock);
2267
2268         if (on)
2269                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2270         else
2271                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2272
2273         for (i = 0; i < internals->slave_count; i++) {
2274                 uint16_t port_id = internals->slaves[i].port_id;
2275
2276                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2277                 if (res == ENOTSUP)
2278                         RTE_BOND_LOG(WARNING,
2279                                      "Setting VLAN filter on slave port %u not supported.",
2280                                      port_id);
2281         }
2282
2283         rte_spinlock_unlock(&internals->lock);
2284         return 0;
2285 }
2286
2287 static int
2288 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2289                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2290                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2291 {
2292         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2293                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2294                                         0, dev->data->numa_node);
2295         if (bd_rx_q == NULL)
2296                 return -1;
2297
2298         bd_rx_q->queue_id = rx_queue_id;
2299         bd_rx_q->dev_private = dev->data->dev_private;
2300
2301         bd_rx_q->nb_rx_desc = nb_rx_desc;
2302
2303         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2304         bd_rx_q->mb_pool = mb_pool;
2305
2306         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2307
2308         return 0;
2309 }
2310
2311 static int
2312 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2313                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2314                 const struct rte_eth_txconf *tx_conf)
2315 {
2316         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2317                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2318                                         0, dev->data->numa_node);
2319
2320         if (bd_tx_q == NULL)
2321                 return -1;
2322
2323         bd_tx_q->queue_id = tx_queue_id;
2324         bd_tx_q->dev_private = dev->data->dev_private;
2325
2326         bd_tx_q->nb_tx_desc = nb_tx_desc;
2327         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2328
2329         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2330
2331         return 0;
2332 }
2333
2334 static void
2335 bond_ethdev_rx_queue_release(void *queue)
2336 {
2337         if (queue == NULL)
2338                 return;
2339
2340         rte_free(queue);
2341 }
2342
2343 static void
2344 bond_ethdev_tx_queue_release(void *queue)
2345 {
2346         if (queue == NULL)
2347                 return;
2348
2349         rte_free(queue);
2350 }
2351
2352 static void
2353 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2354 {
2355         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2356         struct bond_dev_private *internals;
2357
2358         /* Default value for polling slave found is true as we don't want to
2359          * disable the polling thread if we cannot get the lock */
2360         int i, polling_slave_found = 1;
2361
2362         if (cb_arg == NULL)
2363                 return;
2364
2365         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2366         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2367
2368         if (!bonded_ethdev->data->dev_started ||
2369                 !internals->link_status_polling_enabled)
2370                 return;
2371
2372         /* If device is currently being configured then don't check slaves link
2373          * status, wait until next period */
2374         if (rte_spinlock_trylock(&internals->lock)) {
2375                 if (internals->slave_count > 0)
2376                         polling_slave_found = 0;
2377
2378                 for (i = 0; i < internals->slave_count; i++) {
2379                         if (!internals->slaves[i].link_status_poll_enabled)
2380                                 continue;
2381
2382                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2383                         polling_slave_found = 1;
2384
2385                         /* Update slave link status */
2386                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2387                                         internals->slaves[i].link_status_wait_to_complete);
2388
2389                         /* if link status has changed since last checked then call lsc
2390                          * event callback */
2391                         if (slave_ethdev->data->dev_link.link_status !=
2392                                         internals->slaves[i].last_link_status) {
2393                                 internals->slaves[i].last_link_status =
2394                                                 slave_ethdev->data->dev_link.link_status;
2395
2396                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2397                                                 RTE_ETH_EVENT_INTR_LSC,
2398                                                 &bonded_ethdev->data->port_id,
2399                                                 NULL);
2400                         }
2401                 }
2402                 rte_spinlock_unlock(&internals->lock);
2403         }
2404
2405         if (polling_slave_found)
2406                 /* Set alarm to continue monitoring link status of slave ethdev's */
2407                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2408                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2409 }
2410
2411 static int
2412 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2413 {
2414         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2415
2416         struct bond_dev_private *bond_ctx;
2417         struct rte_eth_link slave_link;
2418
2419         uint32_t idx;
2420
2421         bond_ctx = ethdev->data->dev_private;
2422
2423         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2424
2425         if (ethdev->data->dev_started == 0 ||
2426                         bond_ctx->active_slave_count == 0) {
2427                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2428                 return 0;
2429         }
2430
2431         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2432
2433         if (wait_to_complete)
2434                 link_update = rte_eth_link_get;
2435         else
2436                 link_update = rte_eth_link_get_nowait;
2437
2438         switch (bond_ctx->mode) {
2439         case BONDING_MODE_BROADCAST:
2440                 /**
2441                  * Setting link speed to UINT32_MAX to ensure we pick up the
2442                  * value of the first active slave
2443                  */
2444                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2445
2446                 /**
2447                  * link speed is minimum value of all the slaves link speed as
2448                  * packet loss will occur on this slave if transmission at rates
2449                  * greater than this are attempted
2450                  */
2451                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2452                         link_update(bond_ctx->active_slaves[0], &slave_link);
2453
2454                         if (slave_link.link_speed <
2455                                         ethdev->data->dev_link.link_speed)
2456                                 ethdev->data->dev_link.link_speed =
2457                                                 slave_link.link_speed;
2458                 }
2459                 break;
2460         case BONDING_MODE_ACTIVE_BACKUP:
2461                 /* Current primary slave */
2462                 link_update(bond_ctx->current_primary_port, &slave_link);
2463
2464                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2465                 break;
2466         case BONDING_MODE_8023AD:
2467                 ethdev->data->dev_link.link_autoneg =
2468                                 bond_ctx->mode4.slave_link.link_autoneg;
2469                 ethdev->data->dev_link.link_duplex =
2470                                 bond_ctx->mode4.slave_link.link_duplex;
2471                 /* fall through to update link speed */
2472         case BONDING_MODE_ROUND_ROBIN:
2473         case BONDING_MODE_BALANCE:
2474         case BONDING_MODE_TLB:
2475         case BONDING_MODE_ALB:
2476         default:
2477                 /**
2478                  * In theses mode the maximum theoretical link speed is the sum
2479                  * of all the slaves
2480                  */
2481                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2482
2483                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2484                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2485
2486                         ethdev->data->dev_link.link_speed +=
2487                                         slave_link.link_speed;
2488                 }
2489         }
2490
2491
2492         return 0;
2493 }
2494
2495
2496 static int
2497 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2498 {
2499         struct bond_dev_private *internals = dev->data->dev_private;
2500         struct rte_eth_stats slave_stats;
2501         int i, j;
2502
2503         for (i = 0; i < internals->slave_count; i++) {
2504                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2505
2506                 stats->ipackets += slave_stats.ipackets;
2507                 stats->opackets += slave_stats.opackets;
2508                 stats->ibytes += slave_stats.ibytes;
2509                 stats->obytes += slave_stats.obytes;
2510                 stats->imissed += slave_stats.imissed;
2511                 stats->ierrors += slave_stats.ierrors;
2512                 stats->oerrors += slave_stats.oerrors;
2513                 stats->rx_nombuf += slave_stats.rx_nombuf;
2514
2515                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2516                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2517                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2518                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2519                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2520                         stats->q_errors[j] += slave_stats.q_errors[j];
2521                 }
2522
2523         }
2524
2525         return 0;
2526 }
2527
2528 static void
2529 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2530 {
2531         struct bond_dev_private *internals = dev->data->dev_private;
2532         int i;
2533
2534         for (i = 0; i < internals->slave_count; i++)
2535                 rte_eth_stats_reset(internals->slaves[i].port_id);
2536 }
2537
2538 static void
2539 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2540 {
2541         struct bond_dev_private *internals = eth_dev->data->dev_private;
2542         int i;
2543
2544         internals->promiscuous_en = 1;
2545
2546         switch (internals->mode) {
2547         /* Promiscuous mode is propagated to all slaves */
2548         case BONDING_MODE_ROUND_ROBIN:
2549         case BONDING_MODE_BALANCE:
2550         case BONDING_MODE_BROADCAST:
2551                 for (i = 0; i < internals->slave_count; i++)
2552                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2553                 break;
2554         /* In mode4 promiscus mode is managed when slave is added/removed */
2555         case BONDING_MODE_8023AD:
2556                 break;
2557         /* Promiscuous mode is propagated only to primary slave */
2558         case BONDING_MODE_ACTIVE_BACKUP:
2559         case BONDING_MODE_TLB:
2560         case BONDING_MODE_ALB:
2561         default:
2562                 rte_eth_promiscuous_enable(internals->current_primary_port);
2563         }
2564 }
2565
2566 static void
2567 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2568 {
2569         struct bond_dev_private *internals = dev->data->dev_private;
2570         int i;
2571
2572         internals->promiscuous_en = 0;
2573
2574         switch (internals->mode) {
2575         /* Promiscuous mode is propagated to all slaves */
2576         case BONDING_MODE_ROUND_ROBIN:
2577         case BONDING_MODE_BALANCE:
2578         case BONDING_MODE_BROADCAST:
2579                 for (i = 0; i < internals->slave_count; i++)
2580                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2581                 break;
2582         /* In mode4 promiscus mode is set managed when slave is added/removed */
2583         case BONDING_MODE_8023AD:
2584                 break;
2585         /* Promiscuous mode is propagated only to primary slave */
2586         case BONDING_MODE_ACTIVE_BACKUP:
2587         case BONDING_MODE_TLB:
2588         case BONDING_MODE_ALB:
2589         default:
2590                 rte_eth_promiscuous_disable(internals->current_primary_port);
2591         }
2592 }
2593
2594 static void
2595 bond_ethdev_delayed_lsc_propagation(void *arg)
2596 {
2597         if (arg == NULL)
2598                 return;
2599
2600         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2601                         RTE_ETH_EVENT_INTR_LSC, NULL);
2602 }
2603
2604 int
2605 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2606                 void *param, void *ret_param __rte_unused)
2607 {
2608         struct rte_eth_dev *bonded_eth_dev;
2609         struct bond_dev_private *internals;
2610         struct rte_eth_link link;
2611         int rc = -1;
2612
2613         int i, valid_slave = 0;
2614         uint8_t active_pos;
2615         uint8_t lsc_flag = 0;
2616
2617         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2618                 return rc;
2619
2620         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2621
2622         if (check_for_bonded_ethdev(bonded_eth_dev))
2623                 return rc;
2624
2625         internals = bonded_eth_dev->data->dev_private;
2626
2627         /* If the device isn't started don't handle interrupts */
2628         if (!bonded_eth_dev->data->dev_started)
2629                 return rc;
2630
2631         /* verify that port_id is a valid slave of bonded port */
2632         for (i = 0; i < internals->slave_count; i++) {
2633                 if (internals->slaves[i].port_id == port_id) {
2634                         valid_slave = 1;
2635                         break;
2636                 }
2637         }
2638
2639         if (!valid_slave)
2640                 return rc;
2641
2642         /* Synchronize lsc callback parallel calls either by real link event
2643          * from the slaves PMDs or by the bonding PMD itself.
2644          */
2645         rte_spinlock_lock(&internals->lsc_lock);
2646
2647         /* Search for port in active port list */
2648         active_pos = find_slave_by_id(internals->active_slaves,
2649                         internals->active_slave_count, port_id);
2650
2651         rte_eth_link_get_nowait(port_id, &link);
2652         if (link.link_status) {
2653                 if (active_pos < internals->active_slave_count)
2654                         goto link_update;
2655
2656                 /* if no active slave ports then set this port to be primary port */
2657                 if (internals->active_slave_count < 1) {
2658                         /* If first active slave, then change link status */
2659                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2660                         internals->current_primary_port = port_id;
2661                         lsc_flag = 1;
2662
2663                         mac_address_slaves_update(bonded_eth_dev);
2664                 }
2665
2666                 /* check link state properties if bonded link is up*/
2667                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2668                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2669                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2670                                              "for slave %d in bonding mode %d",
2671                                              port_id, internals->mode);
2672                 } else {
2673                         /* inherit slave link properties */
2674                         link_properties_set(bonded_eth_dev, &link);
2675                 }
2676
2677                 activate_slave(bonded_eth_dev, port_id);
2678
2679                 /* If user has defined the primary port then default to using it */
2680                 if (internals->user_defined_primary_port &&
2681                                 internals->primary_port == port_id)
2682                         bond_ethdev_primary_set(internals, port_id);
2683         } else {
2684                 if (active_pos == internals->active_slave_count)
2685                         goto link_update;
2686
2687                 /* Remove from active slave list */
2688                 deactivate_slave(bonded_eth_dev, port_id);
2689
2690                 if (internals->active_slave_count < 1)
2691                         lsc_flag = 1;
2692
2693                 /* Update primary id, take first active slave from list or if none
2694                  * available set to -1 */
2695                 if (port_id == internals->current_primary_port) {
2696                         if (internals->active_slave_count > 0)
2697                                 bond_ethdev_primary_set(internals,
2698                                                 internals->active_slaves[0]);
2699                         else
2700                                 internals->current_primary_port = internals->primary_port;
2701                 }
2702         }
2703
2704 link_update:
2705         /**
2706          * Update bonded device link properties after any change to active
2707          * slaves
2708          */
2709         bond_ethdev_link_update(bonded_eth_dev, 0);
2710
2711         if (lsc_flag) {
2712                 /* Cancel any possible outstanding interrupts if delays are enabled */
2713                 if (internals->link_up_delay_ms > 0 ||
2714                         internals->link_down_delay_ms > 0)
2715                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2716                                         bonded_eth_dev);
2717
2718                 if (bonded_eth_dev->data->dev_link.link_status) {
2719                         if (internals->link_up_delay_ms > 0)
2720                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2721                                                 bond_ethdev_delayed_lsc_propagation,
2722                                                 (void *)bonded_eth_dev);
2723                         else
2724                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2725                                                 RTE_ETH_EVENT_INTR_LSC,
2726                                                 NULL);
2727
2728                 } else {
2729                         if (internals->link_down_delay_ms > 0)
2730                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2731                                                 bond_ethdev_delayed_lsc_propagation,
2732                                                 (void *)bonded_eth_dev);
2733                         else
2734                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2735                                                 RTE_ETH_EVENT_INTR_LSC,
2736                                                 NULL);
2737                 }
2738         }
2739
2740         rte_spinlock_unlock(&internals->lsc_lock);
2741
2742         return rc;
2743 }
2744
2745 static int
2746 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2747                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2748 {
2749         unsigned i, j;
2750         int result = 0;
2751         int slave_reta_size;
2752         unsigned reta_count;
2753         struct bond_dev_private *internals = dev->data->dev_private;
2754
2755         if (reta_size != internals->reta_size)
2756                 return -EINVAL;
2757
2758          /* Copy RETA table */
2759         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2760
2761         for (i = 0; i < reta_count; i++) {
2762                 internals->reta_conf[i].mask = reta_conf[i].mask;
2763                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2764                         if ((reta_conf[i].mask >> j) & 0x01)
2765                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2766         }
2767
2768         /* Fill rest of array */
2769         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2770                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2771                                 sizeof(internals->reta_conf[0]) * reta_count);
2772
2773         /* Propagate RETA over slaves */
2774         for (i = 0; i < internals->slave_count; i++) {
2775                 slave_reta_size = internals->slaves[i].reta_size;
2776                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2777                                 &internals->reta_conf[0], slave_reta_size);
2778                 if (result < 0)
2779                         return result;
2780         }
2781
2782         return 0;
2783 }
2784
2785 static int
2786 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2787                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2788 {
2789         int i, j;
2790         struct bond_dev_private *internals = dev->data->dev_private;
2791
2792         if (reta_size != internals->reta_size)
2793                 return -EINVAL;
2794
2795          /* Copy RETA table */
2796         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2797                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2798                         if ((reta_conf[i].mask >> j) & 0x01)
2799                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2800
2801         return 0;
2802 }
2803
2804 static int
2805 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2806                 struct rte_eth_rss_conf *rss_conf)
2807 {
2808         int i, result = 0;
2809         struct bond_dev_private *internals = dev->data->dev_private;
2810         struct rte_eth_rss_conf bond_rss_conf;
2811
2812         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2813
2814         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2815
2816         if (bond_rss_conf.rss_hf != 0)
2817                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2818
2819         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2820                         sizeof(internals->rss_key)) {
2821                 if (bond_rss_conf.rss_key_len == 0)
2822                         bond_rss_conf.rss_key_len = 40;
2823                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2824                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2825                                 internals->rss_key_len);
2826         }
2827
2828         for (i = 0; i < internals->slave_count; i++) {
2829                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2830                                 &bond_rss_conf);
2831                 if (result < 0)
2832                         return result;
2833         }
2834
2835         return 0;
2836 }
2837
2838 static int
2839 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2840                 struct rte_eth_rss_conf *rss_conf)
2841 {
2842         struct bond_dev_private *internals = dev->data->dev_private;
2843
2844         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2845         rss_conf->rss_key_len = internals->rss_key_len;
2846         if (rss_conf->rss_key)
2847                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2848
2849         return 0;
2850 }
2851
2852 static int
2853 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2854 {
2855         struct rte_eth_dev *slave_eth_dev;
2856         struct bond_dev_private *internals = dev->data->dev_private;
2857         int ret, i;
2858
2859         rte_spinlock_lock(&internals->lock);
2860
2861         for (i = 0; i < internals->slave_count; i++) {
2862                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2863                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2864                         rte_spinlock_unlock(&internals->lock);
2865                         return -ENOTSUP;
2866                 }
2867         }
2868         for (i = 0; i < internals->slave_count; i++) {
2869                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2870                 if (ret < 0) {
2871                         rte_spinlock_unlock(&internals->lock);
2872                         return ret;
2873                 }
2874         }
2875
2876         rte_spinlock_unlock(&internals->lock);
2877         return 0;
2878 }
2879
2880 static int
2881 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2882 {
2883         if (mac_address_set(dev, addr)) {
2884                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2885                 return -EINVAL;
2886         }
2887
2888         return 0;
2889 }
2890
2891 static int
2892 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2893                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2894 {
2895         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2896                 *(const void **)arg = &bond_flow_ops;
2897                 return 0;
2898         }
2899         return -ENOTSUP;
2900 }
2901
2902 static int
2903 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2904                                 __rte_unused uint32_t index, uint32_t vmdq)
2905 {
2906         struct rte_eth_dev *slave_eth_dev;
2907         struct bond_dev_private *internals = dev->data->dev_private;
2908         int ret, i;
2909
2910         rte_spinlock_lock(&internals->lock);
2911
2912         for (i = 0; i < internals->slave_count; i++) {
2913                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2914                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2915                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2916                         ret = -ENOTSUP;
2917                         goto end;
2918                 }
2919         }
2920
2921         for (i = 0; i < internals->slave_count; i++) {
2922                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2923                                 mac_addr, vmdq);
2924                 if (ret < 0) {
2925                         /* rollback */
2926                         for (i--; i >= 0; i--)
2927                                 rte_eth_dev_mac_addr_remove(
2928                                         internals->slaves[i].port_id, mac_addr);
2929                         goto end;
2930                 }
2931         }
2932
2933         ret = 0;
2934 end:
2935         rte_spinlock_unlock(&internals->lock);
2936         return ret;
2937 }
2938
2939 static void
2940 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2941 {
2942         struct rte_eth_dev *slave_eth_dev;
2943         struct bond_dev_private *internals = dev->data->dev_private;
2944         int i;
2945
2946         rte_spinlock_lock(&internals->lock);
2947
2948         for (i = 0; i < internals->slave_count; i++) {
2949                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2950                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2951                         goto end;
2952         }
2953
2954         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2955
2956         for (i = 0; i < internals->slave_count; i++)
2957                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2958                                 mac_addr);
2959
2960 end:
2961         rte_spinlock_unlock(&internals->lock);
2962 }
2963
2964 const struct eth_dev_ops default_dev_ops = {
2965         .dev_start            = bond_ethdev_start,
2966         .dev_stop             = bond_ethdev_stop,
2967         .dev_close            = bond_ethdev_close,
2968         .dev_configure        = bond_ethdev_configure,
2969         .dev_infos_get        = bond_ethdev_info,
2970         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2971         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2972         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2973         .rx_queue_release     = bond_ethdev_rx_queue_release,
2974         .tx_queue_release     = bond_ethdev_tx_queue_release,
2975         .link_update          = bond_ethdev_link_update,
2976         .stats_get            = bond_ethdev_stats_get,
2977         .stats_reset          = bond_ethdev_stats_reset,
2978         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2979         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2980         .reta_update          = bond_ethdev_rss_reta_update,
2981         .reta_query           = bond_ethdev_rss_reta_query,
2982         .rss_hash_update      = bond_ethdev_rss_hash_update,
2983         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2984         .mtu_set              = bond_ethdev_mtu_set,
2985         .mac_addr_set         = bond_ethdev_mac_address_set,
2986         .mac_addr_add         = bond_ethdev_mac_addr_add,
2987         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
2988         .filter_ctrl          = bond_filter_ctrl
2989 };
2990
2991 static int
2992 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2993 {
2994         const char *name = rte_vdev_device_name(dev);
2995         uint8_t socket_id = dev->device.numa_node;
2996         struct bond_dev_private *internals = NULL;
2997         struct rte_eth_dev *eth_dev = NULL;
2998         uint32_t vlan_filter_bmp_size;
2999
3000         /* now do all data allocation - for eth_dev structure, dummy pci driver
3001          * and internal (private) data
3002          */
3003
3004         /* reserve an ethdev entry */
3005         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3006         if (eth_dev == NULL) {
3007                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3008                 goto err;
3009         }
3010
3011         internals = eth_dev->data->dev_private;
3012         eth_dev->data->nb_rx_queues = (uint16_t)1;
3013         eth_dev->data->nb_tx_queues = (uint16_t)1;
3014
3015         /* Allocate memory for storing MAC addresses */
3016         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3017                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3018         if (eth_dev->data->mac_addrs == NULL) {
3019                 RTE_BOND_LOG(ERR,
3020                              "Failed to allocate %u bytes needed to store MAC addresses",
3021                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3022                 goto err;
3023         }
3024
3025         eth_dev->dev_ops = &default_dev_ops;
3026         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3027
3028         rte_spinlock_init(&internals->lock);
3029         rte_spinlock_init(&internals->lsc_lock);
3030
3031         internals->port_id = eth_dev->data->port_id;
3032         internals->mode = BONDING_MODE_INVALID;
3033         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3034         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3035         internals->burst_xmit_hash = burst_xmit_l2_hash;
3036         internals->user_defined_mac = 0;
3037
3038         internals->link_status_polling_enabled = 0;
3039
3040         internals->link_status_polling_interval_ms =
3041                 DEFAULT_POLLING_INTERVAL_10_MS;
3042         internals->link_down_delay_ms = 0;
3043         internals->link_up_delay_ms = 0;
3044
3045         internals->slave_count = 0;
3046         internals->active_slave_count = 0;
3047         internals->rx_offload_capa = 0;
3048         internals->tx_offload_capa = 0;
3049         internals->rx_queue_offload_capa = 0;
3050         internals->tx_queue_offload_capa = 0;
3051         internals->candidate_max_rx_pktlen = 0;
3052         internals->max_rx_pktlen = 0;
3053
3054         /* Initially allow to choose any offload type */
3055         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3056
3057         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3058         memset(internals->slaves, 0, sizeof(internals->slaves));
3059
3060         TAILQ_INIT(&internals->flow_list);
3061         internals->flow_isolated_valid = 0;
3062
3063         /* Set mode 4 default configuration */
3064         bond_mode_8023ad_setup(eth_dev, NULL);
3065         if (bond_ethdev_mode_set(eth_dev, mode)) {
3066                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3067                                  eth_dev->data->port_id, mode);
3068                 goto err;
3069         }
3070
3071         vlan_filter_bmp_size =
3072                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3073         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3074                                                    RTE_CACHE_LINE_SIZE);
3075         if (internals->vlan_filter_bmpmem == NULL) {
3076                 RTE_BOND_LOG(ERR,
3077                              "Failed to allocate vlan bitmap for bonded device %u",
3078                              eth_dev->data->port_id);
3079                 goto err;
3080         }
3081
3082         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3083                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3084         if (internals->vlan_filter_bmp == NULL) {
3085                 RTE_BOND_LOG(ERR,
3086                              "Failed to init vlan bitmap for bonded device %u",
3087                              eth_dev->data->port_id);
3088                 rte_free(internals->vlan_filter_bmpmem);
3089                 goto err;
3090         }
3091
3092         return eth_dev->data->port_id;
3093
3094 err:
3095         rte_free(internals);
3096         if (eth_dev != NULL) {
3097                 rte_free(eth_dev->data->mac_addrs);
3098                 rte_eth_dev_release_port(eth_dev);
3099         }
3100         return -1;
3101 }
3102
3103 static int
3104 bond_probe(struct rte_vdev_device *dev)
3105 {
3106         const char *name;
3107         struct bond_dev_private *internals;
3108         struct rte_kvargs *kvlist;
3109         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3110         int  arg_count, port_id;
3111         uint8_t agg_mode;
3112         struct rte_eth_dev *eth_dev;
3113
3114         if (!dev)
3115                 return -EINVAL;
3116
3117         name = rte_vdev_device_name(dev);
3118         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3119
3120         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3121             strlen(rte_vdev_device_args(dev)) == 0) {
3122                 eth_dev = rte_eth_dev_attach_secondary(name);
3123                 if (!eth_dev) {
3124                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3125                         return -1;
3126                 }
3127                 /* TODO: request info from primary to set up Rx and Tx */
3128                 eth_dev->dev_ops = &default_dev_ops;
3129                 eth_dev->device = &dev->device;
3130                 rte_eth_dev_probing_finish(eth_dev);
3131                 return 0;
3132         }
3133
3134         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3135                 pmd_bond_init_valid_arguments);
3136         if (kvlist == NULL)
3137                 return -1;
3138
3139         /* Parse link bonding mode */
3140         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3141                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3142                                 &bond_ethdev_parse_slave_mode_kvarg,
3143                                 &bonding_mode) != 0) {
3144                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3145                                         name);
3146                         goto parse_error;
3147                 }
3148         } else {
3149                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3150                                 "device %s", name);
3151                 goto parse_error;
3152         }
3153
3154         /* Parse socket id to create bonding device on */
3155         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3156         if (arg_count == 1) {
3157                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3158                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3159                                 != 0) {
3160                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3161                                         "bonded device %s", name);
3162                         goto parse_error;
3163                 }
3164         } else if (arg_count > 1) {
3165                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3166                                 "bonded device %s", name);
3167                 goto parse_error;
3168         } else {
3169                 socket_id = rte_socket_id();
3170         }
3171
3172         dev->device.numa_node = socket_id;
3173
3174         /* Create link bonding eth device */
3175         port_id = bond_alloc(dev, bonding_mode);
3176         if (port_id < 0) {
3177                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3178                                 "socket %u.",   name, bonding_mode, socket_id);
3179                 goto parse_error;
3180         }
3181         internals = rte_eth_devices[port_id].data->dev_private;
3182         internals->kvlist = kvlist;
3183
3184         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3185
3186         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3187                 if (rte_kvargs_process(kvlist,
3188                                 PMD_BOND_AGG_MODE_KVARG,
3189                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3190                                 &agg_mode) != 0) {
3191                         RTE_BOND_LOG(ERR,
3192                                         "Failed to parse agg selection mode for bonded device %s",
3193                                         name);
3194                         goto parse_error;
3195                 }
3196
3197                 if (internals->mode == BONDING_MODE_8023AD)
3198                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3199                                         agg_mode);
3200         } else {
3201                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3202         }
3203
3204         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3205                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3206         return 0;
3207
3208 parse_error:
3209         rte_kvargs_free(kvlist);
3210
3211         return -1;
3212 }
3213
3214 static int
3215 bond_remove(struct rte_vdev_device *dev)
3216 {
3217         struct rte_eth_dev *eth_dev;
3218         struct bond_dev_private *internals;
3219         const char *name;
3220
3221         if (!dev)
3222                 return -EINVAL;
3223
3224         name = rte_vdev_device_name(dev);
3225         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3226
3227         /* now free all data allocation - for eth_dev structure,
3228          * dummy pci driver and internal (private) data
3229          */
3230
3231         /* find an ethdev entry */
3232         eth_dev = rte_eth_dev_allocated(name);
3233         if (eth_dev == NULL)
3234                 return -ENODEV;
3235
3236         RTE_ASSERT(eth_dev->device == &dev->device);
3237
3238         internals = eth_dev->data->dev_private;
3239         if (internals->slave_count != 0)
3240                 return -EBUSY;
3241
3242         if (eth_dev->data->dev_started == 1) {
3243                 bond_ethdev_stop(eth_dev);
3244                 bond_ethdev_close(eth_dev);
3245         }
3246
3247         eth_dev->dev_ops = NULL;
3248         eth_dev->rx_pkt_burst = NULL;
3249         eth_dev->tx_pkt_burst = NULL;
3250
3251         internals = eth_dev->data->dev_private;
3252         /* Try to release mempool used in mode6. If the bond
3253          * device is not mode6, free the NULL is not problem.
3254          */
3255         rte_mempool_free(internals->mode6.mempool);
3256         rte_bitmap_free(internals->vlan_filter_bmp);
3257         rte_free(internals->vlan_filter_bmpmem);
3258         rte_free(eth_dev->data->dev_private);
3259         rte_free(eth_dev->data->mac_addrs);
3260
3261         rte_eth_dev_release_port(eth_dev);
3262
3263         return 0;
3264 }
3265
3266 /* this part will resolve the slave portids after all the other pdev and vdev
3267  * have been allocated */
3268 static int
3269 bond_ethdev_configure(struct rte_eth_dev *dev)
3270 {
3271         const char *name = dev->device->name;
3272         struct bond_dev_private *internals = dev->data->dev_private;
3273         struct rte_kvargs *kvlist = internals->kvlist;
3274         int arg_count;
3275         uint16_t port_id = dev - rte_eth_devices;
3276         uint8_t agg_mode;
3277
3278         static const uint8_t default_rss_key[40] = {
3279                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3280                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3281                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3282                 0xBE, 0xAC, 0x01, 0xFA
3283         };
3284
3285         unsigned i, j;
3286
3287         /* If RSS is enabled, fill table and key with default values */
3288         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3289                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3290                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3291                 memcpy(internals->rss_key, default_rss_key, 40);
3292
3293                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3294                         internals->reta_conf[i].mask = ~0LL;
3295                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3296                                 internals->reta_conf[i].reta[j] =
3297                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3298                                                 dev->data->nb_rx_queues;
3299                 }
3300         }
3301
3302         /* set the max_rx_pktlen */
3303         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3304
3305         /*
3306          * if no kvlist, it means that this bonded device has been created
3307          * through the bonding api.
3308          */
3309         if (!kvlist)
3310                 return 0;
3311
3312         /* Parse MAC address for bonded device */
3313         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3314         if (arg_count == 1) {
3315                 struct ether_addr bond_mac;
3316
3317                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3318                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3319                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3320                                      name);
3321                         return -1;
3322                 }
3323
3324                 /* Set MAC address */
3325                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3326                         RTE_BOND_LOG(ERR,
3327                                      "Failed to set mac address on bonded device %s",
3328                                      name);
3329                         return -1;
3330                 }
3331         } else if (arg_count > 1) {
3332                 RTE_BOND_LOG(ERR,
3333                              "MAC address can be specified only once for bonded device %s",
3334                              name);
3335                 return -1;
3336         }
3337
3338         /* Parse/set balance mode transmit policy */
3339         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3340         if (arg_count == 1) {
3341                 uint8_t xmit_policy;
3342
3343                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3344                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3345                     0) {
3346                         RTE_BOND_LOG(INFO,
3347                                      "Invalid xmit policy specified for bonded device %s",
3348                                      name);
3349                         return -1;
3350                 }
3351
3352                 /* Set balance mode transmit policy*/
3353                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3354                         RTE_BOND_LOG(ERR,
3355                                      "Failed to set balance xmit policy on bonded device %s",
3356                                      name);
3357                         return -1;
3358                 }
3359         } else if (arg_count > 1) {
3360                 RTE_BOND_LOG(ERR,
3361                              "Transmit policy can be specified only once for bonded device %s",
3362                              name);
3363                 return -1;
3364         }
3365
3366         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3367                 if (rte_kvargs_process(kvlist,
3368                                        PMD_BOND_AGG_MODE_KVARG,
3369                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3370                                        &agg_mode) != 0) {
3371                         RTE_BOND_LOG(ERR,
3372                                      "Failed to parse agg selection mode for bonded device %s",
3373                                      name);
3374                 }
3375                 if (internals->mode == BONDING_MODE_8023AD)
3376                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3377                                                               agg_mode);
3378         }
3379
3380         /* Parse/add slave ports to bonded device */
3381         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3382                 struct bond_ethdev_slave_ports slave_ports;
3383                 unsigned i;
3384
3385                 memset(&slave_ports, 0, sizeof(slave_ports));
3386
3387                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3388                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3389                         RTE_BOND_LOG(ERR,
3390                                      "Failed to parse slave ports for bonded device %s",
3391                                      name);
3392                         return -1;
3393                 }
3394
3395                 for (i = 0; i < slave_ports.slave_count; i++) {
3396                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3397                                 RTE_BOND_LOG(ERR,
3398                                              "Failed to add port %d as slave to bonded device %s",
3399                                              slave_ports.slaves[i], name);
3400                         }
3401                 }
3402
3403         } else {
3404                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3405                 return -1;
3406         }
3407
3408         /* Parse/set primary slave port id*/
3409         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3410         if (arg_count == 1) {
3411                 uint16_t primary_slave_port_id;
3412
3413                 if (rte_kvargs_process(kvlist,
3414                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3415                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3416                                        &primary_slave_port_id) < 0) {
3417                         RTE_BOND_LOG(INFO,
3418                                      "Invalid primary slave port id specified for bonded device %s",
3419                                      name);
3420                         return -1;
3421                 }
3422
3423                 /* Set balance mode transmit policy*/
3424                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3425                     != 0) {
3426                         RTE_BOND_LOG(ERR,
3427                                      "Failed to set primary slave port %d on bonded device %s",
3428                                      primary_slave_port_id, name);
3429                         return -1;
3430                 }
3431         } else if (arg_count > 1) {
3432                 RTE_BOND_LOG(INFO,
3433                              "Primary slave can be specified only once for bonded device %s",
3434                              name);
3435                 return -1;
3436         }
3437
3438         /* Parse link status monitor polling interval */
3439         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3440         if (arg_count == 1) {
3441                 uint32_t lsc_poll_interval_ms;
3442
3443                 if (rte_kvargs_process(kvlist,
3444                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3445                                        &bond_ethdev_parse_time_ms_kvarg,
3446                                        &lsc_poll_interval_ms) < 0) {
3447                         RTE_BOND_LOG(INFO,
3448                                      "Invalid lsc polling interval value specified for bonded"
3449                                      " device %s", name);
3450                         return -1;
3451                 }
3452
3453                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3454                     != 0) {
3455                         RTE_BOND_LOG(ERR,
3456                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3457                                      lsc_poll_interval_ms, name);
3458                         return -1;
3459                 }
3460         } else if (arg_count > 1) {
3461                 RTE_BOND_LOG(INFO,
3462                              "LSC polling interval can be specified only once for bonded"
3463                              " device %s", name);
3464                 return -1;
3465         }
3466
3467         /* Parse link up interrupt propagation delay */
3468         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3469         if (arg_count == 1) {
3470                 uint32_t link_up_delay_ms;
3471
3472                 if (rte_kvargs_process(kvlist,
3473                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3474                                        &bond_ethdev_parse_time_ms_kvarg,
3475                                        &link_up_delay_ms) < 0) {
3476                         RTE_BOND_LOG(INFO,
3477                                      "Invalid link up propagation delay value specified for"
3478                                      " bonded device %s", name);
3479                         return -1;
3480                 }
3481
3482                 /* Set balance mode transmit policy*/
3483                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3484                     != 0) {
3485                         RTE_BOND_LOG(ERR,
3486                                      "Failed to set link up propagation delay (%u ms) on bonded"
3487                                      " device %s", link_up_delay_ms, name);
3488                         return -1;
3489                 }
3490         } else if (arg_count > 1) {
3491                 RTE_BOND_LOG(INFO,
3492                              "Link up propagation delay can be specified only once for"
3493                              " bonded device %s", name);
3494                 return -1;
3495         }
3496
3497         /* Parse link down interrupt propagation delay */
3498         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3499         if (arg_count == 1) {
3500                 uint32_t link_down_delay_ms;
3501
3502                 if (rte_kvargs_process(kvlist,
3503                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3504                                        &bond_ethdev_parse_time_ms_kvarg,
3505                                        &link_down_delay_ms) < 0) {
3506                         RTE_BOND_LOG(INFO,
3507                                      "Invalid link down propagation delay value specified for"
3508                                      " bonded device %s", name);
3509                         return -1;
3510                 }
3511
3512                 /* Set balance mode transmit policy*/
3513                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3514                     != 0) {
3515                         RTE_BOND_LOG(ERR,
3516                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3517                                      link_down_delay_ms, name);
3518                         return -1;
3519                 }
3520         } else if (arg_count > 1) {
3521                 RTE_BOND_LOG(INFO,
3522                              "Link down propagation delay can be specified only once for  bonded device %s",
3523                              name);
3524                 return -1;
3525         }
3526
3527         return 0;
3528 }
3529
3530 struct rte_vdev_driver pmd_bond_drv = {
3531         .probe = bond_probe,
3532         .remove = bond_remove,
3533 };
3534
3535 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3536 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3537
3538 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3539         "slave=<ifc> "
3540         "primary=<ifc> "
3541         "mode=[0-6] "
3542         "xmit_policy=[l2 | l23 | l34] "
3543         "agg_mode=[count | stable | bandwidth] "
3544         "socket_id=<int> "
3545         "mac=<mac addr> "
3546         "lsc_poll_period_ms=<int> "
3547         "up_delay=<int> "
3548         "down_delay=<int>");
3549
3550 int bond_logtype;
3551
3552 RTE_INIT(bond_init_log)
3553 {
3554         bond_logtype = rte_log_register("pmd.net.bon");
3555         if (bond_logtype >= 0)
3556                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3557 }