net/bonding: stop and deactivate slaves on stop
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave == slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave == slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint8_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct ether_addr bond_mac;
396
397         struct ether_hdr *hdr;
398
399         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
400         uint16_t num_rx_total = 0;      /* Total number of received packets */
401         uint16_t slaves[RTE_MAX_ETHPORTS];
402         uint16_t slave_count, idx;
403
404         uint8_t collecting;  /* current slave collecting status */
405         const uint8_t promisc = internals->promiscuous_en;
406         uint8_t i, j, k;
407         uint8_t subtype;
408
409         rte_eth_macaddr_get(internals->port_id, &bond_mac);
410         /* Copy slave list to protect against slave up/down changes during tx
411          * bursting */
412         slave_count = internals->active_slave_count;
413         memcpy(slaves, internals->active_slaves,
414                         sizeof(internals->active_slaves[0]) * slave_count);
415
416         idx = internals->active_slave;
417         if (idx >= slave_count) {
418                 internals->active_slave = 0;
419                 idx = 0;
420         }
421         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
422                 j = num_rx_total;
423                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
424                                          COLLECTING);
425
426                 /* Read packets from this slave */
427                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
428                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
429
430                 for (k = j; k < 2 && k < num_rx_total; k++)
431                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
432
433                 /* Handle slow protocol packets. */
434                 while (j < num_rx_total) {
435
436                         /* If packet is not pure L2 and is known, skip it */
437                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438                                 j++;
439                                 continue;
440                         }
441
442                         if (j + 3 < num_rx_total)
443                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
444
445                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
446                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
447
448                         /* Remove packet from array if it is slow packet or slave is not
449                          * in collecting state or bonding interface is not in promiscuous
450                          * mode and packet address does not match. */
451                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
452                                 !collecting || (!promisc &&
453                                         !is_multicast_ether_addr(&hdr->d_addr) &&
454                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
455
456                                 if (hdr->ether_type == ether_type_slow_be) {
457                                         bond_mode_8023ad_handle_slow_pkt(
458                                             internals, slaves[idx], bufs[j]);
459                                 } else
460                                         rte_pktmbuf_free(bufs[j]);
461
462                                 /* Packet is managed by mode 4 or dropped, shift the array */
463                                 num_rx_total--;
464                                 if (j < num_rx_total) {
465                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
466                                                 (num_rx_total - j));
467                                 }
468                         } else
469                                 j++;
470                 }
471                 if (unlikely(++idx == slave_count))
472                         idx = 0;
473         }
474
475         if (++internals->active_slave == slave_count)
476                 internals->active_slave = 0;
477
478         return num_rx_total;
479 }
480
481 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
482 uint32_t burstnumberRX;
483 uint32_t burstnumberTX;
484
485 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
486
487 static void
488 arp_op_name(uint16_t arp_op, char *buf)
489 {
490         switch (arp_op) {
491         case ARP_OP_REQUEST:
492                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
493                 return;
494         case ARP_OP_REPLY:
495                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
496                 return;
497         case ARP_OP_REVREQUEST:
498                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
499                                 "Reverse ARP Request");
500                 return;
501         case ARP_OP_REVREPLY:
502                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
503                                 "Reverse ARP Reply");
504                 return;
505         case ARP_OP_INVREQUEST:
506                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
507                                 "Peer Identify Request");
508                 return;
509         case ARP_OP_INVREPLY:
510                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
511                                 "Peer Identify Reply");
512                 return;
513         default:
514                 break;
515         }
516         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
517         return;
518 }
519 #endif
520 #define MaxIPv4String   16
521 static void
522 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
523 {
524         uint32_t ipv4_addr;
525
526         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
529                 ipv4_addr & 0xFF);
530 }
531
532 #define MAX_CLIENTS_NUMBER      128
533 uint8_t active_clients;
534 struct client_stats_t {
535         uint16_t port;
536         uint32_t ipv4_addr;
537         uint32_t ipv4_rx_packets;
538         uint32_t ipv4_tx_packets;
539 };
540 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
541
542 static void
543 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
544 {
545         int i = 0;
546
547         for (; i < MAX_CLIENTS_NUMBER; i++)     {
548                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
549                         /* Just update RX packets number for this client */
550                         if (TXorRXindicator == &burstnumberRX)
551                                 client_stats[i].ipv4_rx_packets++;
552                         else
553                                 client_stats[i].ipv4_tx_packets++;
554                         return;
555                 }
556         }
557         /* We have a new client. Insert him to the table, and increment stats */
558         if (TXorRXindicator == &burstnumberRX)
559                 client_stats[active_clients].ipv4_rx_packets++;
560         else
561                 client_stats[active_clients].ipv4_tx_packets++;
562         client_stats[active_clients].ipv4_addr = addr;
563         client_stats[active_clients].port = port;
564         active_clients++;
565
566 }
567
568 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
569 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
571                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
573                 info,                                                   \
574                 port,                                                   \
575                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
578                 src_ip,                                                 \
579                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
582                 dst_ip,                                                 \
583                 arp_op, ++burstnumber)
584 #endif
585
586 static void
587 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
588                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
589 {
590         struct ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592         struct arp_hdr *arp_h;
593         char dst_ip[16];
594         char ArpOp[24];
595         char buf[16];
596 #endif
597         char src_ip[16];
598
599         uint16_t ether_type = eth_h->ether_type;
600         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603         strlcpy(buf, info, 16);
604 #endif
605
606         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612 #endif
613                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614         }
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
618                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
621                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
622         }
623 #endif
624 }
625 #endif
626
627 static uint16_t
628 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
629 {
630         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
631         struct bond_dev_private *internals = bd_tx_q->dev_private;
632         struct ether_hdr *eth_h;
633         uint16_t ether_type, offset;
634         uint16_t nb_recv_pkts;
635         int i;
636
637         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
638
639         for (i = 0; i < nb_recv_pkts; i++) {
640                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
641                 ether_type = eth_h->ether_type;
642                 offset = get_vlan_offset(eth_h, &ether_type);
643
644                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
645 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
646                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
647 #endif
648                         bond_mode_alb_arp_recv(eth_h, offset, internals);
649                 }
650 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
651                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
652                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
653 #endif
654         }
655
656         return nb_recv_pkts;
657 }
658
659 static uint16_t
660 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
661                 uint16_t nb_pkts)
662 {
663         struct bond_dev_private *internals;
664         struct bond_tx_queue *bd_tx_q;
665
666         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
667         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
668
669         uint16_t num_of_slaves;
670         uint16_t slaves[RTE_MAX_ETHPORTS];
671
672         uint16_t num_tx_total = 0, num_tx_slave;
673
674         static int slave_idx = 0;
675         int i, cslave_idx = 0, tx_fail_total = 0;
676
677         bd_tx_q = (struct bond_tx_queue *)queue;
678         internals = bd_tx_q->dev_private;
679
680         /* Copy slave list to protect against slave up/down changes during tx
681          * bursting */
682         num_of_slaves = internals->active_slave_count;
683         memcpy(slaves, internals->active_slaves,
684                         sizeof(internals->active_slaves[0]) * num_of_slaves);
685
686         if (num_of_slaves < 1)
687                 return num_tx_total;
688
689         /* Populate slaves mbuf with which packets are to be sent on it  */
690         for (i = 0; i < nb_pkts; i++) {
691                 cslave_idx = (slave_idx + i) % num_of_slaves;
692                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
693         }
694
695         /* increment current slave index so the next call to tx burst starts on the
696          * next slave */
697         slave_idx = ++cslave_idx;
698
699         /* Send packet burst on each slave device */
700         for (i = 0; i < num_of_slaves; i++) {
701                 if (slave_nb_pkts[i] > 0) {
702                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
703                                         slave_bufs[i], slave_nb_pkts[i]);
704
705                         /* if tx burst fails move packets to end of bufs */
706                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
707                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
708
709                                 tx_fail_total += tx_fail_slave;
710
711                                 memcpy(&bufs[nb_pkts - tx_fail_total],
712                                        &slave_bufs[i][num_tx_slave],
713                                        tx_fail_slave * sizeof(bufs[0]));
714                         }
715                         num_tx_total += num_tx_slave;
716                 }
717         }
718
719         return num_tx_total;
720 }
721
722 static uint16_t
723 bond_ethdev_tx_burst_active_backup(void *queue,
724                 struct rte_mbuf **bufs, uint16_t nb_pkts)
725 {
726         struct bond_dev_private *internals;
727         struct bond_tx_queue *bd_tx_q;
728
729         bd_tx_q = (struct bond_tx_queue *)queue;
730         internals = bd_tx_q->dev_private;
731
732         if (internals->active_slave_count < 1)
733                 return 0;
734
735         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
736                         bufs, nb_pkts);
737 }
738
739 static inline uint16_t
740 ether_hash(struct ether_hdr *eth_hdr)
741 {
742         unaligned_uint16_t *word_src_addr =
743                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
744         unaligned_uint16_t *word_dst_addr =
745                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
746
747         return (word_src_addr[0] ^ word_dst_addr[0]) ^
748                         (word_src_addr[1] ^ word_dst_addr[1]) ^
749                         (word_src_addr[2] ^ word_dst_addr[2]);
750 }
751
752 static inline uint32_t
753 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
754 {
755         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
756 }
757
758 static inline uint32_t
759 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
760 {
761         unaligned_uint32_t *word_src_addr =
762                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
763         unaligned_uint32_t *word_dst_addr =
764                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
765
766         return (word_src_addr[0] ^ word_dst_addr[0]) ^
767                         (word_src_addr[1] ^ word_dst_addr[1]) ^
768                         (word_src_addr[2] ^ word_dst_addr[2]) ^
769                         (word_src_addr[3] ^ word_dst_addr[3]);
770 }
771
772
773 void
774 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
775                 uint8_t slave_count, uint16_t *slaves)
776 {
777         struct ether_hdr *eth_hdr;
778         uint32_t hash;
779         int i;
780
781         for (i = 0; i < nb_pkts; i++) {
782                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
783
784                 hash = ether_hash(eth_hdr);
785
786                 slaves[i] = (hash ^= hash >> 8) % slave_count;
787         }
788 }
789
790 void
791 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
792                 uint8_t slave_count, uint16_t *slaves)
793 {
794         uint16_t i;
795         struct ether_hdr *eth_hdr;
796         uint16_t proto;
797         size_t vlan_offset;
798         uint32_t hash, l3hash;
799
800         for (i = 0; i < nb_pkts; i++) {
801                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
802                 l3hash = 0;
803
804                 proto = eth_hdr->ether_type;
805                 hash = ether_hash(eth_hdr);
806
807                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
808
809                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
810                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
811                                         ((char *)(eth_hdr + 1) + vlan_offset);
812                         l3hash = ipv4_hash(ipv4_hdr);
813
814                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
815                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
816                                         ((char *)(eth_hdr + 1) + vlan_offset);
817                         l3hash = ipv6_hash(ipv6_hdr);
818                 }
819
820                 hash = hash ^ l3hash;
821                 hash ^= hash >> 16;
822                 hash ^= hash >> 8;
823
824                 slaves[i] = hash % slave_count;
825         }
826 }
827
828 void
829 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
830                 uint8_t slave_count, uint16_t *slaves)
831 {
832         struct ether_hdr *eth_hdr;
833         uint16_t proto;
834         size_t vlan_offset;
835         int i;
836
837         struct udp_hdr *udp_hdr;
838         struct tcp_hdr *tcp_hdr;
839         uint32_t hash, l3hash, l4hash;
840
841         for (i = 0; i < nb_pkts; i++) {
842                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
843                 proto = eth_hdr->ether_type;
844                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
845                 l3hash = 0;
846                 l4hash = 0;
847
848                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
849                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
850                                         ((char *)(eth_hdr + 1) + vlan_offset);
851                         size_t ip_hdr_offset;
852
853                         l3hash = ipv4_hash(ipv4_hdr);
854
855                         /* there is no L4 header in fragmented packet */
856                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
857                                                                 == 0)) {
858                                 ip_hdr_offset = (ipv4_hdr->version_ihl
859                                         & IPV4_HDR_IHL_MASK) *
860                                         IPV4_IHL_MULTIPLIER;
861
862                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
863                                         tcp_hdr = (struct tcp_hdr *)
864                                                 ((char *)ipv4_hdr +
865                                                         ip_hdr_offset);
866                                         l4hash = HASH_L4_PORTS(tcp_hdr);
867                                 } else if (ipv4_hdr->next_proto_id ==
868                                                                 IPPROTO_UDP) {
869                                         udp_hdr = (struct udp_hdr *)
870                                                 ((char *)ipv4_hdr +
871                                                         ip_hdr_offset);
872                                         l4hash = HASH_L4_PORTS(udp_hdr);
873                                 }
874                         }
875                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
876                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
877                                         ((char *)(eth_hdr + 1) + vlan_offset);
878                         l3hash = ipv6_hash(ipv6_hdr);
879
880                         if (ipv6_hdr->proto == IPPROTO_TCP) {
881                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
882                                 l4hash = HASH_L4_PORTS(tcp_hdr);
883                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
884                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
885                                 l4hash = HASH_L4_PORTS(udp_hdr);
886                         }
887                 }
888
889                 hash = l3hash ^ l4hash;
890                 hash ^= hash >> 16;
891                 hash ^= hash >> 8;
892
893                 slaves[i] = hash % slave_count;
894         }
895 }
896
897 struct bwg_slave {
898         uint64_t bwg_left_int;
899         uint64_t bwg_left_remainder;
900         uint8_t slave;
901 };
902
903 void
904 bond_tlb_activate_slave(struct bond_dev_private *internals) {
905         int i;
906
907         for (i = 0; i < internals->active_slave_count; i++) {
908                 tlb_last_obytets[internals->active_slaves[i]] = 0;
909         }
910 }
911
912 static int
913 bandwidth_cmp(const void *a, const void *b)
914 {
915         const struct bwg_slave *bwg_a = a;
916         const struct bwg_slave *bwg_b = b;
917         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
918         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
919                         (int64_t)bwg_a->bwg_left_remainder;
920         if (diff > 0)
921                 return 1;
922         else if (diff < 0)
923                 return -1;
924         else if (diff2 > 0)
925                 return 1;
926         else if (diff2 < 0)
927                 return -1;
928         else
929                 return 0;
930 }
931
932 static void
933 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
934                 struct bwg_slave *bwg_slave)
935 {
936         struct rte_eth_link link_status;
937
938         rte_eth_link_get_nowait(port_id, &link_status);
939         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
940         if (link_bwg == 0)
941                 return;
942         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
943         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
944         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
945 }
946
947 static void
948 bond_ethdev_update_tlb_slave_cb(void *arg)
949 {
950         struct bond_dev_private *internals = arg;
951         struct rte_eth_stats slave_stats;
952         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
953         uint8_t slave_count;
954         uint64_t tx_bytes;
955
956         uint8_t update_stats = 0;
957         uint8_t i, slave_id;
958
959         internals->slave_update_idx++;
960
961
962         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
963                 update_stats = 1;
964
965         for (i = 0; i < internals->active_slave_count; i++) {
966                 slave_id = internals->active_slaves[i];
967                 rte_eth_stats_get(slave_id, &slave_stats);
968                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
969                 bandwidth_left(slave_id, tx_bytes,
970                                 internals->slave_update_idx, &bwg_array[i]);
971                 bwg_array[i].slave = slave_id;
972
973                 if (update_stats) {
974                         tlb_last_obytets[slave_id] = slave_stats.obytes;
975                 }
976         }
977
978         if (update_stats == 1)
979                 internals->slave_update_idx = 0;
980
981         slave_count = i;
982         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
983         for (i = 0; i < slave_count; i++)
984                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
985
986         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
987                         (struct bond_dev_private *)internals);
988 }
989
990 static uint16_t
991 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
992 {
993         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
994         struct bond_dev_private *internals = bd_tx_q->dev_private;
995
996         struct rte_eth_dev *primary_port =
997                         &rte_eth_devices[internals->primary_port];
998         uint16_t num_tx_total = 0;
999         uint16_t i, j;
1000
1001         uint16_t num_of_slaves = internals->active_slave_count;
1002         uint16_t slaves[RTE_MAX_ETHPORTS];
1003
1004         struct ether_hdr *ether_hdr;
1005         struct ether_addr primary_slave_addr;
1006         struct ether_addr active_slave_addr;
1007
1008         if (num_of_slaves < 1)
1009                 return num_tx_total;
1010
1011         memcpy(slaves, internals->tlb_slaves_order,
1012                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1013
1014
1015         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1016
1017         if (nb_pkts > 3) {
1018                 for (i = 0; i < 3; i++)
1019                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1020         }
1021
1022         for (i = 0; i < num_of_slaves; i++) {
1023                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1024                 for (j = num_tx_total; j < nb_pkts; j++) {
1025                         if (j + 3 < nb_pkts)
1026                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1027
1028                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1029                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1030                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1031 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1032                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1033 #endif
1034                 }
1035
1036                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1037                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1038
1039                 if (num_tx_total == nb_pkts)
1040                         break;
1041         }
1042
1043         return num_tx_total;
1044 }
1045
1046 void
1047 bond_tlb_disable(struct bond_dev_private *internals)
1048 {
1049         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1050 }
1051
1052 void
1053 bond_tlb_enable(struct bond_dev_private *internals)
1054 {
1055         bond_ethdev_update_tlb_slave_cb(internals);
1056 }
1057
1058 static uint16_t
1059 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1060 {
1061         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1062         struct bond_dev_private *internals = bd_tx_q->dev_private;
1063
1064         struct ether_hdr *eth_h;
1065         uint16_t ether_type, offset;
1066
1067         struct client_data *client_info;
1068
1069         /*
1070          * We create transmit buffers for every slave and one additional to send
1071          * through tlb. In worst case every packet will be send on one port.
1072          */
1073         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1074         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1075
1076         /*
1077          * We create separate transmit buffers for update packets as they won't
1078          * be counted in num_tx_total.
1079          */
1080         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1081         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1082
1083         struct rte_mbuf *upd_pkt;
1084         size_t pkt_size;
1085
1086         uint16_t num_send, num_not_send = 0;
1087         uint16_t num_tx_total = 0;
1088         uint16_t slave_idx;
1089
1090         int i, j;
1091
1092         /* Search tx buffer for ARP packets and forward them to alb */
1093         for (i = 0; i < nb_pkts; i++) {
1094                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1095                 ether_type = eth_h->ether_type;
1096                 offset = get_vlan_offset(eth_h, &ether_type);
1097
1098                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1099                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1100
1101                         /* Change src mac in eth header */
1102                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1103
1104                         /* Add packet to slave tx buffer */
1105                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1106                         slave_bufs_pkts[slave_idx]++;
1107                 } else {
1108                         /* If packet is not ARP, send it with TLB policy */
1109                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1110                                         bufs[i];
1111                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1112                 }
1113         }
1114
1115         /* Update connected client ARP tables */
1116         if (internals->mode6.ntt) {
1117                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1118                         client_info = &internals->mode6.client_table[i];
1119
1120                         if (client_info->in_use) {
1121                                 /* Allocate new packet to send ARP update on current slave */
1122                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1123                                 if (upd_pkt == NULL) {
1124                                         RTE_BOND_LOG(ERR,
1125                                                      "Failed to allocate ARP packet from pool");
1126                                         continue;
1127                                 }
1128                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1129                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1130                                 upd_pkt->data_len = pkt_size;
1131                                 upd_pkt->pkt_len = pkt_size;
1132
1133                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1134                                                 internals);
1135
1136                                 /* Add packet to update tx buffer */
1137                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1138                                 update_bufs_pkts[slave_idx]++;
1139                         }
1140                 }
1141                 internals->mode6.ntt = 0;
1142         }
1143
1144         /* Send ARP packets on proper slaves */
1145         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1146                 if (slave_bufs_pkts[i] > 0) {
1147                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1148                                         slave_bufs[i], slave_bufs_pkts[i]);
1149                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1150                                 bufs[nb_pkts - 1 - num_not_send - j] =
1151                                                 slave_bufs[i][nb_pkts - 1 - j];
1152                         }
1153
1154                         num_tx_total += num_send;
1155                         num_not_send += slave_bufs_pkts[i] - num_send;
1156
1157 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1158         /* Print TX stats including update packets */
1159                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1160                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1161                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1162                         }
1163 #endif
1164                 }
1165         }
1166
1167         /* Send update packets on proper slaves */
1168         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1169                 if (update_bufs_pkts[i] > 0) {
1170                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1171                                         update_bufs_pkts[i]);
1172                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1173                                 rte_pktmbuf_free(update_bufs[i][j]);
1174                         }
1175 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1176                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1177                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1178                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1179                         }
1180 #endif
1181                 }
1182         }
1183
1184         /* Send non-ARP packets using tlb policy */
1185         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1186                 num_send = bond_ethdev_tx_burst_tlb(queue,
1187                                 slave_bufs[RTE_MAX_ETHPORTS],
1188                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1189
1190                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1191                         bufs[nb_pkts - 1 - num_not_send - j] =
1192                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1193                 }
1194
1195                 num_tx_total += num_send;
1196         }
1197
1198         return num_tx_total;
1199 }
1200
1201 static uint16_t
1202 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1203                 uint16_t nb_bufs)
1204 {
1205         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1206         struct bond_dev_private *internals = bd_tx_q->dev_private;
1207
1208         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1209         uint16_t slave_count;
1210
1211         /* Array to sort mbufs for transmission on each slave into */
1212         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1213         /* Number of mbufs for transmission on each slave */
1214         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1215         /* Mapping array generated by hash function to map mbufs to slaves */
1216         uint16_t bufs_slave_port_idxs[nb_bufs];
1217
1218         uint16_t slave_tx_count;
1219         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1220
1221         uint16_t i;
1222
1223         if (unlikely(nb_bufs == 0))
1224                 return 0;
1225
1226         /* Copy slave list to protect against slave up/down changes during tx
1227          * bursting */
1228         slave_count = internals->active_slave_count;
1229         if (unlikely(slave_count < 1))
1230                 return 0;
1231
1232         memcpy(slave_port_ids, internals->active_slaves,
1233                         sizeof(slave_port_ids[0]) * slave_count);
1234
1235         /*
1236          * Populate slaves mbuf with the packets which are to be sent on it
1237          * selecting output slave using hash based on xmit policy
1238          */
1239         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1240                         bufs_slave_port_idxs);
1241
1242         for (i = 0; i < nb_bufs; i++) {
1243                 /* Populate slave mbuf arrays with mbufs for that slave. */
1244                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1245
1246                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1247         }
1248
1249         /* Send packet burst on each slave device */
1250         for (i = 0; i < slave_count; i++) {
1251                 if (slave_nb_bufs[i] == 0)
1252                         continue;
1253
1254                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1255                                 bd_tx_q->queue_id, slave_bufs[i],
1256                                 slave_nb_bufs[i]);
1257
1258                 total_tx_count += slave_tx_count;
1259
1260                 /* If tx burst fails move packets to end of bufs */
1261                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1262                         int slave_tx_fail_count = slave_nb_bufs[i] -
1263                                         slave_tx_count;
1264                         total_tx_fail_count += slave_tx_fail_count;
1265                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1266                                &slave_bufs[i][slave_tx_count],
1267                                slave_tx_fail_count * sizeof(bufs[0]));
1268                 }
1269         }
1270
1271         return total_tx_count;
1272 }
1273
1274 static uint16_t
1275 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1276                 uint16_t nb_bufs)
1277 {
1278         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1279         struct bond_dev_private *internals = bd_tx_q->dev_private;
1280
1281         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1282         uint16_t slave_count;
1283
1284         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1285         uint16_t dist_slave_count;
1286
1287         /* 2-D array to sort mbufs for transmission on each slave into */
1288         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1289         /* Number of mbufs for transmission on each slave */
1290         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1291         /* Mapping array generated by hash function to map mbufs to slaves */
1292         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1293
1294         uint16_t slave_tx_count;
1295         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1296
1297         uint16_t i;
1298
1299         if (unlikely(nb_bufs == 0))
1300                 return 0;
1301
1302         /* Copy slave list to protect against slave up/down changes during tx
1303          * bursting */
1304         slave_count = internals->active_slave_count;
1305         if (unlikely(slave_count < 1))
1306                 return 0;
1307
1308         memcpy(slave_port_ids, internals->active_slaves,
1309                         sizeof(slave_port_ids[0]) * slave_count);
1310
1311         dist_slave_count = 0;
1312         for (i = 0; i < slave_count; i++) {
1313                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1314
1315                 if (ACTOR_STATE(port, DISTRIBUTING))
1316                         dist_slave_port_ids[dist_slave_count++] =
1317                                         slave_port_ids[i];
1318         }
1319
1320         if (likely(dist_slave_count > 1)) {
1321
1322                 /*
1323                  * Populate slaves mbuf with the packets which are to be sent
1324                  * on it, selecting output slave using hash based on xmit policy
1325                  */
1326                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1327                                 bufs_slave_port_idxs);
1328
1329                 for (i = 0; i < nb_bufs; i++) {
1330                         /*
1331                          * Populate slave mbuf arrays with mbufs for that
1332                          * slave
1333                          */
1334                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1335
1336                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1337                                         bufs[i];
1338                 }
1339
1340
1341                 /* Send packet burst on each slave device */
1342                 for (i = 0; i < dist_slave_count; i++) {
1343                         if (slave_nb_bufs[i] == 0)
1344                                 continue;
1345
1346                         slave_tx_count = rte_eth_tx_burst(
1347                                         dist_slave_port_ids[i],
1348                                         bd_tx_q->queue_id, slave_bufs[i],
1349                                         slave_nb_bufs[i]);
1350
1351                         total_tx_count += slave_tx_count;
1352
1353                         /* If tx burst fails move packets to end of bufs */
1354                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1355                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1356                                                 slave_tx_count;
1357                                 total_tx_fail_count += slave_tx_fail_count;
1358
1359                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1360                                        &slave_bufs[i][slave_tx_count],
1361                                        slave_tx_fail_count * sizeof(bufs[0]));
1362                         }
1363                 }
1364         }
1365
1366         /* Check for LACP control packets and send if available */
1367         for (i = 0; i < slave_count; i++) {
1368                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1369                 struct rte_mbuf *ctrl_pkt = NULL;
1370
1371                 if (likely(rte_ring_empty(port->tx_ring)))
1372                         continue;
1373
1374                 if (rte_ring_dequeue(port->tx_ring,
1375                                      (void **)&ctrl_pkt) != -ENOENT) {
1376                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1377                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1378                         /*
1379                          * re-enqueue LAG control plane packets to buffering
1380                          * ring if transmission fails so the packet isn't lost.
1381                          */
1382                         if (slave_tx_count != 1)
1383                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1384                 }
1385         }
1386
1387         return total_tx_count;
1388 }
1389
1390 static uint16_t
1391 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1392                 uint16_t nb_pkts)
1393 {
1394         struct bond_dev_private *internals;
1395         struct bond_tx_queue *bd_tx_q;
1396
1397         uint8_t tx_failed_flag = 0, num_of_slaves;
1398         uint16_t slaves[RTE_MAX_ETHPORTS];
1399
1400         uint16_t max_nb_of_tx_pkts = 0;
1401
1402         int slave_tx_total[RTE_MAX_ETHPORTS];
1403         int i, most_successful_tx_slave = -1;
1404
1405         bd_tx_q = (struct bond_tx_queue *)queue;
1406         internals = bd_tx_q->dev_private;
1407
1408         /* Copy slave list to protect against slave up/down changes during tx
1409          * bursting */
1410         num_of_slaves = internals->active_slave_count;
1411         memcpy(slaves, internals->active_slaves,
1412                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1413
1414         if (num_of_slaves < 1)
1415                 return 0;
1416
1417         /* Increment reference count on mbufs */
1418         for (i = 0; i < nb_pkts; i++)
1419                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1420
1421         /* Transmit burst on each active slave */
1422         for (i = 0; i < num_of_slaves; i++) {
1423                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1424                                         bufs, nb_pkts);
1425
1426                 if (unlikely(slave_tx_total[i] < nb_pkts))
1427                         tx_failed_flag = 1;
1428
1429                 /* record the value and slave index for the slave which transmits the
1430                  * maximum number of packets */
1431                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1432                         max_nb_of_tx_pkts = slave_tx_total[i];
1433                         most_successful_tx_slave = i;
1434                 }
1435         }
1436
1437         /* if slaves fail to transmit packets from burst, the calling application
1438          * is not expected to know about multiple references to packets so we must
1439          * handle failures of all packets except those of the most successful slave
1440          */
1441         if (unlikely(tx_failed_flag))
1442                 for (i = 0; i < num_of_slaves; i++)
1443                         if (i != most_successful_tx_slave)
1444                                 while (slave_tx_total[i] < nb_pkts)
1445                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1446
1447         return max_nb_of_tx_pkts;
1448 }
1449
1450 void
1451 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1452 {
1453         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1454
1455         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1456                 /**
1457                  * If in mode 4 then save the link properties of the first
1458                  * slave, all subsequent slaves must match these properties
1459                  */
1460                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1461
1462                 bond_link->link_autoneg = slave_link->link_autoneg;
1463                 bond_link->link_duplex = slave_link->link_duplex;
1464                 bond_link->link_speed = slave_link->link_speed;
1465         } else {
1466                 /**
1467                  * In any other mode the link properties are set to default
1468                  * values of AUTONEG/DUPLEX
1469                  */
1470                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1471                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1472         }
1473 }
1474
1475 int
1476 link_properties_valid(struct rte_eth_dev *ethdev,
1477                 struct rte_eth_link *slave_link)
1478 {
1479         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1480
1481         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1482                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1483
1484                 if (bond_link->link_duplex != slave_link->link_duplex ||
1485                         bond_link->link_autoneg != slave_link->link_autoneg ||
1486                         bond_link->link_speed != slave_link->link_speed)
1487                         return -1;
1488         }
1489
1490         return 0;
1491 }
1492
1493 int
1494 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1495 {
1496         struct ether_addr *mac_addr;
1497
1498         if (eth_dev == NULL) {
1499                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1500                 return -1;
1501         }
1502
1503         if (dst_mac_addr == NULL) {
1504                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1505                 return -1;
1506         }
1507
1508         mac_addr = eth_dev->data->mac_addrs;
1509
1510         ether_addr_copy(mac_addr, dst_mac_addr);
1511         return 0;
1512 }
1513
1514 int
1515 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1516 {
1517         struct ether_addr *mac_addr;
1518
1519         if (eth_dev == NULL) {
1520                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1521                 return -1;
1522         }
1523
1524         if (new_mac_addr == NULL) {
1525                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1526                 return -1;
1527         }
1528
1529         mac_addr = eth_dev->data->mac_addrs;
1530
1531         /* If new MAC is different to current MAC then update */
1532         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1533                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1534
1535         return 0;
1536 }
1537
1538 static const struct ether_addr null_mac_addr;
1539
1540 /*
1541  * Add additional MAC addresses to the slave
1542  */
1543 int
1544 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1545                 uint16_t slave_port_id)
1546 {
1547         int i, ret;
1548         struct ether_addr *mac_addr;
1549
1550         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1551                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1552                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1553                         break;
1554
1555                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1556                 if (ret < 0) {
1557                         /* rollback */
1558                         for (i--; i > 0; i--)
1559                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1560                                         &bonded_eth_dev->data->mac_addrs[i]);
1561                         return ret;
1562                 }
1563         }
1564
1565         return 0;
1566 }
1567
1568 /*
1569  * Remove additional MAC addresses from the slave
1570  */
1571 int
1572 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1573                 uint16_t slave_port_id)
1574 {
1575         int i, rc, ret;
1576         struct ether_addr *mac_addr;
1577
1578         rc = 0;
1579         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1580                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1581                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1582                         break;
1583
1584                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1585                 /* save only the first error */
1586                 if (ret < 0 && rc == 0)
1587                         rc = ret;
1588         }
1589
1590         return rc;
1591 }
1592
1593 int
1594 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1595 {
1596         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1597         int i;
1598
1599         /* Update slave devices MAC addresses */
1600         if (internals->slave_count < 1)
1601                 return -1;
1602
1603         switch (internals->mode) {
1604         case BONDING_MODE_ROUND_ROBIN:
1605         case BONDING_MODE_BALANCE:
1606         case BONDING_MODE_BROADCAST:
1607                 for (i = 0; i < internals->slave_count; i++) {
1608                         if (rte_eth_dev_default_mac_addr_set(
1609                                         internals->slaves[i].port_id,
1610                                         bonded_eth_dev->data->mac_addrs)) {
1611                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1612                                                 internals->slaves[i].port_id);
1613                                 return -1;
1614                         }
1615                 }
1616                 break;
1617         case BONDING_MODE_8023AD:
1618                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1619                 break;
1620         case BONDING_MODE_ACTIVE_BACKUP:
1621         case BONDING_MODE_TLB:
1622         case BONDING_MODE_ALB:
1623         default:
1624                 for (i = 0; i < internals->slave_count; i++) {
1625                         if (internals->slaves[i].port_id ==
1626                                         internals->current_primary_port) {
1627                                 if (rte_eth_dev_default_mac_addr_set(
1628                                                 internals->primary_port,
1629                                                 bonded_eth_dev->data->mac_addrs)) {
1630                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1631                                                         internals->current_primary_port);
1632                                         return -1;
1633                                 }
1634                         } else {
1635                                 if (rte_eth_dev_default_mac_addr_set(
1636                                                 internals->slaves[i].port_id,
1637                                                 &internals->slaves[i].persisted_mac_addr)) {
1638                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1639                                                         internals->slaves[i].port_id);
1640                                         return -1;
1641                                 }
1642                         }
1643                 }
1644         }
1645
1646         return 0;
1647 }
1648
1649 int
1650 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1651 {
1652         struct bond_dev_private *internals;
1653
1654         internals = eth_dev->data->dev_private;
1655
1656         switch (mode) {
1657         case BONDING_MODE_ROUND_ROBIN:
1658                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1659                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1660                 break;
1661         case BONDING_MODE_ACTIVE_BACKUP:
1662                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1663                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1664                 break;
1665         case BONDING_MODE_BALANCE:
1666                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1667                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1668                 break;
1669         case BONDING_MODE_BROADCAST:
1670                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1671                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1672                 break;
1673         case BONDING_MODE_8023AD:
1674                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1675                         return -1;
1676
1677                 if (internals->mode4.dedicated_queues.enabled == 0) {
1678                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1679                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1680                         RTE_BOND_LOG(WARNING,
1681                                 "Using mode 4, it is necessary to do TX burst "
1682                                 "and RX burst at least every 100ms.");
1683                 } else {
1684                         /* Use flow director's optimization */
1685                         eth_dev->rx_pkt_burst =
1686                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1687                         eth_dev->tx_pkt_burst =
1688                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1689                 }
1690                 break;
1691         case BONDING_MODE_TLB:
1692                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1693                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1694                 break;
1695         case BONDING_MODE_ALB:
1696                 if (bond_mode_alb_enable(eth_dev) != 0)
1697                         return -1;
1698
1699                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1700                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1701                 break;
1702         default:
1703                 return -1;
1704         }
1705
1706         internals->mode = mode;
1707
1708         return 0;
1709 }
1710
1711
1712 static int
1713 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1714                 struct rte_eth_dev *slave_eth_dev)
1715 {
1716         int errval = 0;
1717         struct bond_dev_private *internals = (struct bond_dev_private *)
1718                 bonded_eth_dev->data->dev_private;
1719         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1720
1721         if (port->slow_pool == NULL) {
1722                 char mem_name[256];
1723                 int slave_id = slave_eth_dev->data->port_id;
1724
1725                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1726                                 slave_id);
1727                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1728                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1729                         slave_eth_dev->data->numa_node);
1730
1731                 /* Any memory allocation failure in initialization is critical because
1732                  * resources can't be free, so reinitialization is impossible. */
1733                 if (port->slow_pool == NULL) {
1734                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1735                                 slave_id, mem_name, rte_strerror(rte_errno));
1736                 }
1737         }
1738
1739         if (internals->mode4.dedicated_queues.enabled == 1) {
1740                 /* Configure slow Rx queue */
1741
1742                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1743                                 internals->mode4.dedicated_queues.rx_qid, 128,
1744                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1745                                 NULL, port->slow_pool);
1746                 if (errval != 0) {
1747                         RTE_BOND_LOG(ERR,
1748                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1749                                         slave_eth_dev->data->port_id,
1750                                         internals->mode4.dedicated_queues.rx_qid,
1751                                         errval);
1752                         return errval;
1753                 }
1754
1755                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1756                                 internals->mode4.dedicated_queues.tx_qid, 512,
1757                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1758                                 NULL);
1759                 if (errval != 0) {
1760                         RTE_BOND_LOG(ERR,
1761                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1762                                 slave_eth_dev->data->port_id,
1763                                 internals->mode4.dedicated_queues.tx_qid,
1764                                 errval);
1765                         return errval;
1766                 }
1767         }
1768         return 0;
1769 }
1770
1771 int
1772 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1773                 struct rte_eth_dev *slave_eth_dev)
1774 {
1775         struct bond_rx_queue *bd_rx_q;
1776         struct bond_tx_queue *bd_tx_q;
1777         uint16_t nb_rx_queues;
1778         uint16_t nb_tx_queues;
1779
1780         int errval;
1781         uint16_t q_id;
1782         struct rte_flow_error flow_error;
1783
1784         struct bond_dev_private *internals = (struct bond_dev_private *)
1785                 bonded_eth_dev->data->dev_private;
1786
1787         /* Stop slave */
1788         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1789
1790         /* Enable interrupts on slave device if supported */
1791         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1792                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1793
1794         /* If RSS is enabled for bonding, try to enable it for slaves  */
1795         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1796                 if (internals->rss_key_len != 0) {
1797                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1798                                         internals->rss_key_len;
1799                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1800                                         internals->rss_key;
1801                 } else {
1802                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1803                 }
1804
1805                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1806                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1807                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1808                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1809         }
1810
1811         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1812                         DEV_RX_OFFLOAD_VLAN_FILTER)
1813                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1814                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1815         else
1816                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1817                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1818
1819         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1820         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1821
1822         if (internals->mode == BONDING_MODE_8023AD) {
1823                 if (internals->mode4.dedicated_queues.enabled == 1) {
1824                         nb_rx_queues++;
1825                         nb_tx_queues++;
1826                 }
1827         }
1828
1829         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1830                                      bonded_eth_dev->data->mtu);
1831         if (errval != 0 && errval != -ENOTSUP) {
1832                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1833                                 slave_eth_dev->data->port_id, errval);
1834                 return errval;
1835         }
1836
1837         /* Configure device */
1838         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1839                         nb_rx_queues, nb_tx_queues,
1840                         &(slave_eth_dev->data->dev_conf));
1841         if (errval != 0) {
1842                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1843                                 slave_eth_dev->data->port_id, errval);
1844                 return errval;
1845         }
1846
1847         /* Setup Rx Queues */
1848         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1849                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1850
1851                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1852                                 bd_rx_q->nb_rx_desc,
1853                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1854                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1855                 if (errval != 0) {
1856                         RTE_BOND_LOG(ERR,
1857                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1858                                         slave_eth_dev->data->port_id, q_id, errval);
1859                         return errval;
1860                 }
1861         }
1862
1863         /* Setup Tx Queues */
1864         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1865                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1866
1867                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1868                                 bd_tx_q->nb_tx_desc,
1869                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1870                                 &bd_tx_q->tx_conf);
1871                 if (errval != 0) {
1872                         RTE_BOND_LOG(ERR,
1873                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1874                                 slave_eth_dev->data->port_id, q_id, errval);
1875                         return errval;
1876                 }
1877         }
1878
1879         if (internals->mode == BONDING_MODE_8023AD &&
1880                         internals->mode4.dedicated_queues.enabled == 1) {
1881                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1882                                 != 0)
1883                         return errval;
1884
1885                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1886                                 slave_eth_dev->data->port_id) != 0) {
1887                         RTE_BOND_LOG(ERR,
1888                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1889                                 slave_eth_dev->data->port_id, q_id, errval);
1890                         return -1;
1891                 }
1892
1893                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1894                         rte_flow_destroy(slave_eth_dev->data->port_id,
1895                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1896                                         &flow_error);
1897
1898                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1899                                 slave_eth_dev->data->port_id);
1900         }
1901
1902         /* Start device */
1903         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1904         if (errval != 0) {
1905                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1906                                 slave_eth_dev->data->port_id, errval);
1907                 return -1;
1908         }
1909
1910         /* If RSS is enabled for bonding, synchronize RETA */
1911         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1912                 int i;
1913                 struct bond_dev_private *internals;
1914
1915                 internals = bonded_eth_dev->data->dev_private;
1916
1917                 for (i = 0; i < internals->slave_count; i++) {
1918                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1919                                 errval = rte_eth_dev_rss_reta_update(
1920                                                 slave_eth_dev->data->port_id,
1921                                                 &internals->reta_conf[0],
1922                                                 internals->slaves[i].reta_size);
1923                                 if (errval != 0) {
1924                                         RTE_BOND_LOG(WARNING,
1925                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1926                                                      " RSS Configuration for bonding may be inconsistent.",
1927                                                      slave_eth_dev->data->port_id, errval);
1928                                 }
1929                                 break;
1930                         }
1931                 }
1932         }
1933
1934         /* If lsc interrupt is set, check initial slave's link status */
1935         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1936                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1937                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1938                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1939                         NULL);
1940         }
1941
1942         return 0;
1943 }
1944
1945 void
1946 slave_remove(struct bond_dev_private *internals,
1947                 struct rte_eth_dev *slave_eth_dev)
1948 {
1949         uint8_t i;
1950
1951         for (i = 0; i < internals->slave_count; i++)
1952                 if (internals->slaves[i].port_id ==
1953                                 slave_eth_dev->data->port_id)
1954                         break;
1955
1956         if (i < (internals->slave_count - 1)) {
1957                 struct rte_flow *flow;
1958
1959                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1960                                 sizeof(internals->slaves[0]) *
1961                                 (internals->slave_count - i - 1));
1962                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1963                         memmove(&flow->flows[i], &flow->flows[i + 1],
1964                                 sizeof(flow->flows[0]) *
1965                                 (internals->slave_count - i - 1));
1966                         flow->flows[internals->slave_count - 1] = NULL;
1967                 }
1968         }
1969
1970         internals->slave_count--;
1971
1972         /* force reconfiguration of slave interfaces */
1973         _rte_eth_dev_reset(slave_eth_dev);
1974 }
1975
1976 static void
1977 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1978
1979 void
1980 slave_add(struct bond_dev_private *internals,
1981                 struct rte_eth_dev *slave_eth_dev)
1982 {
1983         struct bond_slave_details *slave_details =
1984                         &internals->slaves[internals->slave_count];
1985
1986         slave_details->port_id = slave_eth_dev->data->port_id;
1987         slave_details->last_link_status = 0;
1988
1989         /* Mark slave devices that don't support interrupts so we can
1990          * compensate when we start the bond
1991          */
1992         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1993                 slave_details->link_status_poll_enabled = 1;
1994         }
1995
1996         slave_details->link_status_wait_to_complete = 0;
1997         /* clean tlb_last_obytes when adding port for bonding device */
1998         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1999                         sizeof(struct ether_addr));
2000 }
2001
2002 void
2003 bond_ethdev_primary_set(struct bond_dev_private *internals,
2004                 uint16_t slave_port_id)
2005 {
2006         int i;
2007
2008         if (internals->active_slave_count < 1)
2009                 internals->current_primary_port = slave_port_id;
2010         else
2011                 /* Search bonded device slave ports for new proposed primary port */
2012                 for (i = 0; i < internals->active_slave_count; i++) {
2013                         if (internals->active_slaves[i] == slave_port_id)
2014                                 internals->current_primary_port = slave_port_id;
2015                 }
2016 }
2017
2018 static void
2019 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2020
2021 static int
2022 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2023 {
2024         struct bond_dev_private *internals;
2025         int i;
2026
2027         /* slave eth dev will be started by bonded device */
2028         if (check_for_bonded_ethdev(eth_dev)) {
2029                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2030                                 eth_dev->data->port_id);
2031                 return -1;
2032         }
2033
2034         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2035         eth_dev->data->dev_started = 1;
2036
2037         internals = eth_dev->data->dev_private;
2038
2039         if (internals->slave_count == 0) {
2040                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2041                 goto out_err;
2042         }
2043
2044         if (internals->user_defined_mac == 0) {
2045                 struct ether_addr *new_mac_addr = NULL;
2046
2047                 for (i = 0; i < internals->slave_count; i++)
2048                         if (internals->slaves[i].port_id == internals->primary_port)
2049                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2050
2051                 if (new_mac_addr == NULL)
2052                         goto out_err;
2053
2054                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2055                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2056                                         eth_dev->data->port_id);
2057                         goto out_err;
2058                 }
2059         }
2060
2061         /* If bonded device is configure in promiscuous mode then re-apply config */
2062         if (internals->promiscuous_en)
2063                 bond_ethdev_promiscuous_enable(eth_dev);
2064
2065         if (internals->mode == BONDING_MODE_8023AD) {
2066                 if (internals->mode4.dedicated_queues.enabled == 1) {
2067                         internals->mode4.dedicated_queues.rx_qid =
2068                                         eth_dev->data->nb_rx_queues;
2069                         internals->mode4.dedicated_queues.tx_qid =
2070                                         eth_dev->data->nb_tx_queues;
2071                 }
2072         }
2073
2074
2075         /* Reconfigure each slave device if starting bonded device */
2076         for (i = 0; i < internals->slave_count; i++) {
2077                 struct rte_eth_dev *slave_ethdev =
2078                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2079                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2080                         RTE_BOND_LOG(ERR,
2081                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2082                                 eth_dev->data->port_id,
2083                                 internals->slaves[i].port_id);
2084                         goto out_err;
2085                 }
2086                 /* We will need to poll for link status if any slave doesn't
2087                  * support interrupts
2088                  */
2089                 if (internals->slaves[i].link_status_poll_enabled)
2090                         internals->link_status_polling_enabled = 1;
2091         }
2092
2093         /* start polling if needed */
2094         if (internals->link_status_polling_enabled) {
2095                 rte_eal_alarm_set(
2096                         internals->link_status_polling_interval_ms * 1000,
2097                         bond_ethdev_slave_link_status_change_monitor,
2098                         (void *)&rte_eth_devices[internals->port_id]);
2099         }
2100
2101         /* Update all slave devices MACs*/
2102         if (mac_address_slaves_update(eth_dev) != 0)
2103                 goto out_err;
2104
2105         if (internals->user_defined_primary_port)
2106                 bond_ethdev_primary_set(internals, internals->primary_port);
2107
2108         if (internals->mode == BONDING_MODE_8023AD)
2109                 bond_mode_8023ad_start(eth_dev);
2110
2111         if (internals->mode == BONDING_MODE_TLB ||
2112                         internals->mode == BONDING_MODE_ALB)
2113                 bond_tlb_enable(internals);
2114
2115         return 0;
2116
2117 out_err:
2118         eth_dev->data->dev_started = 0;
2119         return -1;
2120 }
2121
2122 static void
2123 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2124 {
2125         uint8_t i;
2126
2127         if (dev->data->rx_queues != NULL) {
2128                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2129                         rte_free(dev->data->rx_queues[i]);
2130                         dev->data->rx_queues[i] = NULL;
2131                 }
2132                 dev->data->nb_rx_queues = 0;
2133         }
2134
2135         if (dev->data->tx_queues != NULL) {
2136                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2137                         rte_free(dev->data->tx_queues[i]);
2138                         dev->data->tx_queues[i] = NULL;
2139                 }
2140                 dev->data->nb_tx_queues = 0;
2141         }
2142 }
2143
2144 void
2145 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2146 {
2147         struct bond_dev_private *internals = eth_dev->data->dev_private;
2148         uint8_t i;
2149
2150         if (internals->mode == BONDING_MODE_8023AD) {
2151                 struct port *port;
2152                 void *pkt = NULL;
2153
2154                 bond_mode_8023ad_stop(eth_dev);
2155
2156                 /* Discard all messages to/from mode 4 state machines */
2157                 for (i = 0; i < internals->active_slave_count; i++) {
2158                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2159
2160                         RTE_ASSERT(port->rx_ring != NULL);
2161                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2162                                 rte_pktmbuf_free(pkt);
2163
2164                         RTE_ASSERT(port->tx_ring != NULL);
2165                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2166                                 rte_pktmbuf_free(pkt);
2167                 }
2168         }
2169
2170         if (internals->mode == BONDING_MODE_TLB ||
2171                         internals->mode == BONDING_MODE_ALB) {
2172                 bond_tlb_disable(internals);
2173                 for (i = 0; i < internals->active_slave_count; i++)
2174                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2175         }
2176
2177         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2178         eth_dev->data->dev_started = 0;
2179
2180         internals->link_status_polling_enabled = 0;
2181         for (i = 0; i < internals->slave_count; i++) {
2182                 internals->slaves[i].last_link_status = 0;
2183                 rte_eth_dev_stop(internals->slaves[i].port_id);
2184                 deactivate_slave(eth_dev, internals->slaves[i].port_id);
2185         }
2186 }
2187
2188 void
2189 bond_ethdev_close(struct rte_eth_dev *dev)
2190 {
2191         struct bond_dev_private *internals = dev->data->dev_private;
2192         uint8_t bond_port_id = internals->port_id;
2193         int skipped = 0;
2194         struct rte_flow_error ferror;
2195
2196         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2197         while (internals->slave_count != skipped) {
2198                 uint16_t port_id = internals->slaves[skipped].port_id;
2199
2200                 rte_eth_dev_stop(port_id);
2201
2202                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2203                         RTE_BOND_LOG(ERR,
2204                                      "Failed to remove port %d from bonded device %s",
2205                                      port_id, dev->device->name);
2206                         skipped++;
2207                 }
2208         }
2209         bond_flow_ops.flush(dev, &ferror);
2210         bond_ethdev_free_queues(dev);
2211         rte_bitmap_reset(internals->vlan_filter_bmp);
2212 }
2213
2214 /* forward declaration */
2215 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2216
2217 static void
2218 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2219 {
2220         struct bond_dev_private *internals = dev->data->dev_private;
2221
2222         uint16_t max_nb_rx_queues = UINT16_MAX;
2223         uint16_t max_nb_tx_queues = UINT16_MAX;
2224
2225         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2226
2227         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2228                         internals->candidate_max_rx_pktlen :
2229                         ETHER_MAX_JUMBO_FRAME_LEN;
2230
2231         /* Max number of tx/rx queues that the bonded device can support is the
2232          * minimum values of the bonded slaves, as all slaves must be capable
2233          * of supporting the same number of tx/rx queues.
2234          */
2235         if (internals->slave_count > 0) {
2236                 struct rte_eth_dev_info slave_info;
2237                 uint8_t idx;
2238
2239                 for (idx = 0; idx < internals->slave_count; idx++) {
2240                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2241                                         &slave_info);
2242
2243                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2244                                 max_nb_rx_queues = slave_info.max_rx_queues;
2245
2246                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2247                                 max_nb_tx_queues = slave_info.max_tx_queues;
2248                 }
2249         }
2250
2251         dev_info->max_rx_queues = max_nb_rx_queues;
2252         dev_info->max_tx_queues = max_nb_tx_queues;
2253
2254         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2255                sizeof(dev_info->default_rxconf));
2256         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2257                sizeof(dev_info->default_txconf));
2258
2259         memcpy(&dev_info->rx_desc_lim, &internals->rx_desc_lim,
2260                sizeof(dev_info->rx_desc_lim));
2261         memcpy(&dev_info->tx_desc_lim, &internals->tx_desc_lim,
2262                sizeof(dev_info->tx_desc_lim));
2263
2264         /**
2265          * If dedicated hw queues enabled for link bonding device in LACP mode
2266          * then we need to reduce the maximum number of data path queues by 1.
2267          */
2268         if (internals->mode == BONDING_MODE_8023AD &&
2269                 internals->mode4.dedicated_queues.enabled == 1) {
2270                 dev_info->max_rx_queues--;
2271                 dev_info->max_tx_queues--;
2272         }
2273
2274         dev_info->min_rx_bufsize = 0;
2275
2276         dev_info->rx_offload_capa = internals->rx_offload_capa;
2277         dev_info->tx_offload_capa = internals->tx_offload_capa;
2278         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2279         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2280         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2281
2282         dev_info->reta_size = internals->reta_size;
2283 }
2284
2285 static int
2286 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2287 {
2288         int res;
2289         uint16_t i;
2290         struct bond_dev_private *internals = dev->data->dev_private;
2291
2292         /* don't do this while a slave is being added */
2293         rte_spinlock_lock(&internals->lock);
2294
2295         if (on)
2296                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2297         else
2298                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2299
2300         for (i = 0; i < internals->slave_count; i++) {
2301                 uint16_t port_id = internals->slaves[i].port_id;
2302
2303                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2304                 if (res == ENOTSUP)
2305                         RTE_BOND_LOG(WARNING,
2306                                      "Setting VLAN filter on slave port %u not supported.",
2307                                      port_id);
2308         }
2309
2310         rte_spinlock_unlock(&internals->lock);
2311         return 0;
2312 }
2313
2314 static int
2315 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2316                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2317                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2318 {
2319         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2320                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2321                                         0, dev->data->numa_node);
2322         if (bd_rx_q == NULL)
2323                 return -1;
2324
2325         bd_rx_q->queue_id = rx_queue_id;
2326         bd_rx_q->dev_private = dev->data->dev_private;
2327
2328         bd_rx_q->nb_rx_desc = nb_rx_desc;
2329
2330         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2331         bd_rx_q->mb_pool = mb_pool;
2332
2333         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2334
2335         return 0;
2336 }
2337
2338 static int
2339 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2340                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2341                 const struct rte_eth_txconf *tx_conf)
2342 {
2343         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2344                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2345                                         0, dev->data->numa_node);
2346
2347         if (bd_tx_q == NULL)
2348                 return -1;
2349
2350         bd_tx_q->queue_id = tx_queue_id;
2351         bd_tx_q->dev_private = dev->data->dev_private;
2352
2353         bd_tx_q->nb_tx_desc = nb_tx_desc;
2354         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2355
2356         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2357
2358         return 0;
2359 }
2360
2361 static void
2362 bond_ethdev_rx_queue_release(void *queue)
2363 {
2364         if (queue == NULL)
2365                 return;
2366
2367         rte_free(queue);
2368 }
2369
2370 static void
2371 bond_ethdev_tx_queue_release(void *queue)
2372 {
2373         if (queue == NULL)
2374                 return;
2375
2376         rte_free(queue);
2377 }
2378
2379 static void
2380 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2381 {
2382         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2383         struct bond_dev_private *internals;
2384
2385         /* Default value for polling slave found is true as we don't want to
2386          * disable the polling thread if we cannot get the lock */
2387         int i, polling_slave_found = 1;
2388
2389         if (cb_arg == NULL)
2390                 return;
2391
2392         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2393         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2394
2395         if (!bonded_ethdev->data->dev_started ||
2396                 !internals->link_status_polling_enabled)
2397                 return;
2398
2399         /* If device is currently being configured then don't check slaves link
2400          * status, wait until next period */
2401         if (rte_spinlock_trylock(&internals->lock)) {
2402                 if (internals->slave_count > 0)
2403                         polling_slave_found = 0;
2404
2405                 for (i = 0; i < internals->slave_count; i++) {
2406                         if (!internals->slaves[i].link_status_poll_enabled)
2407                                 continue;
2408
2409                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2410                         polling_slave_found = 1;
2411
2412                         /* Update slave link status */
2413                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2414                                         internals->slaves[i].link_status_wait_to_complete);
2415
2416                         /* if link status has changed since last checked then call lsc
2417                          * event callback */
2418                         if (slave_ethdev->data->dev_link.link_status !=
2419                                         internals->slaves[i].last_link_status) {
2420                                 internals->slaves[i].last_link_status =
2421                                                 slave_ethdev->data->dev_link.link_status;
2422
2423                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2424                                                 RTE_ETH_EVENT_INTR_LSC,
2425                                                 &bonded_ethdev->data->port_id,
2426                                                 NULL);
2427                         }
2428                 }
2429                 rte_spinlock_unlock(&internals->lock);
2430         }
2431
2432         if (polling_slave_found)
2433                 /* Set alarm to continue monitoring link status of slave ethdev's */
2434                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2435                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2436 }
2437
2438 static int
2439 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2440 {
2441         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2442
2443         struct bond_dev_private *bond_ctx;
2444         struct rte_eth_link slave_link;
2445
2446         uint32_t idx;
2447
2448         bond_ctx = ethdev->data->dev_private;
2449
2450         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2451
2452         if (ethdev->data->dev_started == 0 ||
2453                         bond_ctx->active_slave_count == 0) {
2454                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2455                 return 0;
2456         }
2457
2458         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2459
2460         if (wait_to_complete)
2461                 link_update = rte_eth_link_get;
2462         else
2463                 link_update = rte_eth_link_get_nowait;
2464
2465         switch (bond_ctx->mode) {
2466         case BONDING_MODE_BROADCAST:
2467                 /**
2468                  * Setting link speed to UINT32_MAX to ensure we pick up the
2469                  * value of the first active slave
2470                  */
2471                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2472
2473                 /**
2474                  * link speed is minimum value of all the slaves link speed as
2475                  * packet loss will occur on this slave if transmission at rates
2476                  * greater than this are attempted
2477                  */
2478                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2479                         link_update(bond_ctx->active_slaves[0], &slave_link);
2480
2481                         if (slave_link.link_speed <
2482                                         ethdev->data->dev_link.link_speed)
2483                                 ethdev->data->dev_link.link_speed =
2484                                                 slave_link.link_speed;
2485                 }
2486                 break;
2487         case BONDING_MODE_ACTIVE_BACKUP:
2488                 /* Current primary slave */
2489                 link_update(bond_ctx->current_primary_port, &slave_link);
2490
2491                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2492                 break;
2493         case BONDING_MODE_8023AD:
2494                 ethdev->data->dev_link.link_autoneg =
2495                                 bond_ctx->mode4.slave_link.link_autoneg;
2496                 ethdev->data->dev_link.link_duplex =
2497                                 bond_ctx->mode4.slave_link.link_duplex;
2498                 /* fall through to update link speed */
2499         case BONDING_MODE_ROUND_ROBIN:
2500         case BONDING_MODE_BALANCE:
2501         case BONDING_MODE_TLB:
2502         case BONDING_MODE_ALB:
2503         default:
2504                 /**
2505                  * In theses mode the maximum theoretical link speed is the sum
2506                  * of all the slaves
2507                  */
2508                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2509
2510                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2511                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2512
2513                         ethdev->data->dev_link.link_speed +=
2514                                         slave_link.link_speed;
2515                 }
2516         }
2517
2518
2519         return 0;
2520 }
2521
2522
2523 static int
2524 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2525 {
2526         struct bond_dev_private *internals = dev->data->dev_private;
2527         struct rte_eth_stats slave_stats;
2528         int i, j;
2529
2530         for (i = 0; i < internals->slave_count; i++) {
2531                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2532
2533                 stats->ipackets += slave_stats.ipackets;
2534                 stats->opackets += slave_stats.opackets;
2535                 stats->ibytes += slave_stats.ibytes;
2536                 stats->obytes += slave_stats.obytes;
2537                 stats->imissed += slave_stats.imissed;
2538                 stats->ierrors += slave_stats.ierrors;
2539                 stats->oerrors += slave_stats.oerrors;
2540                 stats->rx_nombuf += slave_stats.rx_nombuf;
2541
2542                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2543                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2544                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2545                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2546                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2547                         stats->q_errors[j] += slave_stats.q_errors[j];
2548                 }
2549
2550         }
2551
2552         return 0;
2553 }
2554
2555 static void
2556 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2557 {
2558         struct bond_dev_private *internals = dev->data->dev_private;
2559         int i;
2560
2561         for (i = 0; i < internals->slave_count; i++)
2562                 rte_eth_stats_reset(internals->slaves[i].port_id);
2563 }
2564
2565 static void
2566 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2567 {
2568         struct bond_dev_private *internals = eth_dev->data->dev_private;
2569         int i;
2570
2571         internals->promiscuous_en = 1;
2572
2573         switch (internals->mode) {
2574         /* Promiscuous mode is propagated to all slaves */
2575         case BONDING_MODE_ROUND_ROBIN:
2576         case BONDING_MODE_BALANCE:
2577         case BONDING_MODE_BROADCAST:
2578                 for (i = 0; i < internals->slave_count; i++)
2579                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2580                 break;
2581         /* In mode4 promiscus mode is managed when slave is added/removed */
2582         case BONDING_MODE_8023AD:
2583                 break;
2584         /* Promiscuous mode is propagated only to primary slave */
2585         case BONDING_MODE_ACTIVE_BACKUP:
2586         case BONDING_MODE_TLB:
2587         case BONDING_MODE_ALB:
2588         default:
2589                 rte_eth_promiscuous_enable(internals->current_primary_port);
2590         }
2591 }
2592
2593 static void
2594 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2595 {
2596         struct bond_dev_private *internals = dev->data->dev_private;
2597         int i;
2598
2599         internals->promiscuous_en = 0;
2600
2601         switch (internals->mode) {
2602         /* Promiscuous mode is propagated to all slaves */
2603         case BONDING_MODE_ROUND_ROBIN:
2604         case BONDING_MODE_BALANCE:
2605         case BONDING_MODE_BROADCAST:
2606                 for (i = 0; i < internals->slave_count; i++)
2607                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2608                 break;
2609         /* In mode4 promiscus mode is set managed when slave is added/removed */
2610         case BONDING_MODE_8023AD:
2611                 break;
2612         /* Promiscuous mode is propagated only to primary slave */
2613         case BONDING_MODE_ACTIVE_BACKUP:
2614         case BONDING_MODE_TLB:
2615         case BONDING_MODE_ALB:
2616         default:
2617                 rte_eth_promiscuous_disable(internals->current_primary_port);
2618         }
2619 }
2620
2621 static void
2622 bond_ethdev_delayed_lsc_propagation(void *arg)
2623 {
2624         if (arg == NULL)
2625                 return;
2626
2627         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2628                         RTE_ETH_EVENT_INTR_LSC, NULL);
2629 }
2630
2631 int
2632 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2633                 void *param, void *ret_param __rte_unused)
2634 {
2635         struct rte_eth_dev *bonded_eth_dev;
2636         struct bond_dev_private *internals;
2637         struct rte_eth_link link;
2638         int rc = -1;
2639
2640         int i, valid_slave = 0;
2641         uint8_t active_pos;
2642         uint8_t lsc_flag = 0;
2643
2644         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2645                 return rc;
2646
2647         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2648
2649         if (check_for_bonded_ethdev(bonded_eth_dev))
2650                 return rc;
2651
2652         internals = bonded_eth_dev->data->dev_private;
2653
2654         /* If the device isn't started don't handle interrupts */
2655         if (!bonded_eth_dev->data->dev_started)
2656                 return rc;
2657
2658         /* verify that port_id is a valid slave of bonded port */
2659         for (i = 0; i < internals->slave_count; i++) {
2660                 if (internals->slaves[i].port_id == port_id) {
2661                         valid_slave = 1;
2662                         break;
2663                 }
2664         }
2665
2666         if (!valid_slave)
2667                 return rc;
2668
2669         /* Synchronize lsc callback parallel calls either by real link event
2670          * from the slaves PMDs or by the bonding PMD itself.
2671          */
2672         rte_spinlock_lock(&internals->lsc_lock);
2673
2674         /* Search for port in active port list */
2675         active_pos = find_slave_by_id(internals->active_slaves,
2676                         internals->active_slave_count, port_id);
2677
2678         rte_eth_link_get_nowait(port_id, &link);
2679         if (link.link_status) {
2680                 if (active_pos < internals->active_slave_count)
2681                         goto link_update;
2682
2683                 /* if no active slave ports then set this port to be primary port */
2684                 if (internals->active_slave_count < 1) {
2685                         /* If first active slave, then change link status */
2686                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2687                         internals->current_primary_port = port_id;
2688                         lsc_flag = 1;
2689
2690                         mac_address_slaves_update(bonded_eth_dev);
2691                 }
2692
2693                 /* check link state properties if bonded link is up*/
2694                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2695                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2696                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2697                                              "for slave %d in bonding mode %d",
2698                                              port_id, internals->mode);
2699                 } else {
2700                         /* inherit slave link properties */
2701                         link_properties_set(bonded_eth_dev, &link);
2702                 }
2703
2704                 activate_slave(bonded_eth_dev, port_id);
2705
2706                 /* If user has defined the primary port then default to using it */
2707                 if (internals->user_defined_primary_port &&
2708                                 internals->primary_port == port_id)
2709                         bond_ethdev_primary_set(internals, port_id);
2710         } else {
2711                 if (active_pos == internals->active_slave_count)
2712                         goto link_update;
2713
2714                 /* Remove from active slave list */
2715                 deactivate_slave(bonded_eth_dev, port_id);
2716
2717                 if (internals->active_slave_count < 1)
2718                         lsc_flag = 1;
2719
2720                 /* Update primary id, take first active slave from list or if none
2721                  * available set to -1 */
2722                 if (port_id == internals->current_primary_port) {
2723                         if (internals->active_slave_count > 0)
2724                                 bond_ethdev_primary_set(internals,
2725                                                 internals->active_slaves[0]);
2726                         else
2727                                 internals->current_primary_port = internals->primary_port;
2728                 }
2729         }
2730
2731 link_update:
2732         /**
2733          * Update bonded device link properties after any change to active
2734          * slaves
2735          */
2736         bond_ethdev_link_update(bonded_eth_dev, 0);
2737
2738         if (lsc_flag) {
2739                 /* Cancel any possible outstanding interrupts if delays are enabled */
2740                 if (internals->link_up_delay_ms > 0 ||
2741                         internals->link_down_delay_ms > 0)
2742                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2743                                         bonded_eth_dev);
2744
2745                 if (bonded_eth_dev->data->dev_link.link_status) {
2746                         if (internals->link_up_delay_ms > 0)
2747                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2748                                                 bond_ethdev_delayed_lsc_propagation,
2749                                                 (void *)bonded_eth_dev);
2750                         else
2751                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2752                                                 RTE_ETH_EVENT_INTR_LSC,
2753                                                 NULL);
2754
2755                 } else {
2756                         if (internals->link_down_delay_ms > 0)
2757                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2758                                                 bond_ethdev_delayed_lsc_propagation,
2759                                                 (void *)bonded_eth_dev);
2760                         else
2761                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2762                                                 RTE_ETH_EVENT_INTR_LSC,
2763                                                 NULL);
2764                 }
2765         }
2766
2767         rte_spinlock_unlock(&internals->lsc_lock);
2768
2769         return rc;
2770 }
2771
2772 static int
2773 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2774                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2775 {
2776         unsigned i, j;
2777         int result = 0;
2778         int slave_reta_size;
2779         unsigned reta_count;
2780         struct bond_dev_private *internals = dev->data->dev_private;
2781
2782         if (reta_size != internals->reta_size)
2783                 return -EINVAL;
2784
2785          /* Copy RETA table */
2786         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2787
2788         for (i = 0; i < reta_count; i++) {
2789                 internals->reta_conf[i].mask = reta_conf[i].mask;
2790                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2791                         if ((reta_conf[i].mask >> j) & 0x01)
2792                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2793         }
2794
2795         /* Fill rest of array */
2796         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2797                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2798                                 sizeof(internals->reta_conf[0]) * reta_count);
2799
2800         /* Propagate RETA over slaves */
2801         for (i = 0; i < internals->slave_count; i++) {
2802                 slave_reta_size = internals->slaves[i].reta_size;
2803                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2804                                 &internals->reta_conf[0], slave_reta_size);
2805                 if (result < 0)
2806                         return result;
2807         }
2808
2809         return 0;
2810 }
2811
2812 static int
2813 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2814                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2815 {
2816         int i, j;
2817         struct bond_dev_private *internals = dev->data->dev_private;
2818
2819         if (reta_size != internals->reta_size)
2820                 return -EINVAL;
2821
2822          /* Copy RETA table */
2823         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2824                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2825                         if ((reta_conf[i].mask >> j) & 0x01)
2826                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2827
2828         return 0;
2829 }
2830
2831 static int
2832 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2833                 struct rte_eth_rss_conf *rss_conf)
2834 {
2835         int i, result = 0;
2836         struct bond_dev_private *internals = dev->data->dev_private;
2837         struct rte_eth_rss_conf bond_rss_conf;
2838
2839         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2840
2841         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2842
2843         if (bond_rss_conf.rss_hf != 0)
2844                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2845
2846         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2847                         sizeof(internals->rss_key)) {
2848                 if (bond_rss_conf.rss_key_len == 0)
2849                         bond_rss_conf.rss_key_len = 40;
2850                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2851                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2852                                 internals->rss_key_len);
2853         }
2854
2855         for (i = 0; i < internals->slave_count; i++) {
2856                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2857                                 &bond_rss_conf);
2858                 if (result < 0)
2859                         return result;
2860         }
2861
2862         return 0;
2863 }
2864
2865 static int
2866 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2867                 struct rte_eth_rss_conf *rss_conf)
2868 {
2869         struct bond_dev_private *internals = dev->data->dev_private;
2870
2871         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2872         rss_conf->rss_key_len = internals->rss_key_len;
2873         if (rss_conf->rss_key)
2874                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2875
2876         return 0;
2877 }
2878
2879 static int
2880 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2881 {
2882         struct rte_eth_dev *slave_eth_dev;
2883         struct bond_dev_private *internals = dev->data->dev_private;
2884         int ret, i;
2885
2886         rte_spinlock_lock(&internals->lock);
2887
2888         for (i = 0; i < internals->slave_count; i++) {
2889                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2890                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2891                         rte_spinlock_unlock(&internals->lock);
2892                         return -ENOTSUP;
2893                 }
2894         }
2895         for (i = 0; i < internals->slave_count; i++) {
2896                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2897                 if (ret < 0) {
2898                         rte_spinlock_unlock(&internals->lock);
2899                         return ret;
2900                 }
2901         }
2902
2903         rte_spinlock_unlock(&internals->lock);
2904         return 0;
2905 }
2906
2907 static int
2908 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2909 {
2910         if (mac_address_set(dev, addr)) {
2911                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2912                 return -EINVAL;
2913         }
2914
2915         return 0;
2916 }
2917
2918 static int
2919 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2920                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2921 {
2922         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2923                 *(const void **)arg = &bond_flow_ops;
2924                 return 0;
2925         }
2926         return -ENOTSUP;
2927 }
2928
2929 static int
2930 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2931                                 __rte_unused uint32_t index, uint32_t vmdq)
2932 {
2933         struct rte_eth_dev *slave_eth_dev;
2934         struct bond_dev_private *internals = dev->data->dev_private;
2935         int ret, i;
2936
2937         rte_spinlock_lock(&internals->lock);
2938
2939         for (i = 0; i < internals->slave_count; i++) {
2940                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2941                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2942                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2943                         ret = -ENOTSUP;
2944                         goto end;
2945                 }
2946         }
2947
2948         for (i = 0; i < internals->slave_count; i++) {
2949                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2950                                 mac_addr, vmdq);
2951                 if (ret < 0) {
2952                         /* rollback */
2953                         for (i--; i >= 0; i--)
2954                                 rte_eth_dev_mac_addr_remove(
2955                                         internals->slaves[i].port_id, mac_addr);
2956                         goto end;
2957                 }
2958         }
2959
2960         ret = 0;
2961 end:
2962         rte_spinlock_unlock(&internals->lock);
2963         return ret;
2964 }
2965
2966 static void
2967 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2968 {
2969         struct rte_eth_dev *slave_eth_dev;
2970         struct bond_dev_private *internals = dev->data->dev_private;
2971         int i;
2972
2973         rte_spinlock_lock(&internals->lock);
2974
2975         for (i = 0; i < internals->slave_count; i++) {
2976                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2977                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2978                         goto end;
2979         }
2980
2981         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2982
2983         for (i = 0; i < internals->slave_count; i++)
2984                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2985                                 mac_addr);
2986
2987 end:
2988         rte_spinlock_unlock(&internals->lock);
2989 }
2990
2991 const struct eth_dev_ops default_dev_ops = {
2992         .dev_start            = bond_ethdev_start,
2993         .dev_stop             = bond_ethdev_stop,
2994         .dev_close            = bond_ethdev_close,
2995         .dev_configure        = bond_ethdev_configure,
2996         .dev_infos_get        = bond_ethdev_info,
2997         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2998         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2999         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3000         .rx_queue_release     = bond_ethdev_rx_queue_release,
3001         .tx_queue_release     = bond_ethdev_tx_queue_release,
3002         .link_update          = bond_ethdev_link_update,
3003         .stats_get            = bond_ethdev_stats_get,
3004         .stats_reset          = bond_ethdev_stats_reset,
3005         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3006         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3007         .reta_update          = bond_ethdev_rss_reta_update,
3008         .reta_query           = bond_ethdev_rss_reta_query,
3009         .rss_hash_update      = bond_ethdev_rss_hash_update,
3010         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3011         .mtu_set              = bond_ethdev_mtu_set,
3012         .mac_addr_set         = bond_ethdev_mac_address_set,
3013         .mac_addr_add         = bond_ethdev_mac_addr_add,
3014         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3015         .filter_ctrl          = bond_filter_ctrl
3016 };
3017
3018 static int
3019 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3020 {
3021         const char *name = rte_vdev_device_name(dev);
3022         uint8_t socket_id = dev->device.numa_node;
3023         struct bond_dev_private *internals = NULL;
3024         struct rte_eth_dev *eth_dev = NULL;
3025         uint32_t vlan_filter_bmp_size;
3026
3027         /* now do all data allocation - for eth_dev structure, dummy pci driver
3028          * and internal (private) data
3029          */
3030
3031         /* reserve an ethdev entry */
3032         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3033         if (eth_dev == NULL) {
3034                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3035                 goto err;
3036         }
3037
3038         internals = eth_dev->data->dev_private;
3039         eth_dev->data->nb_rx_queues = (uint16_t)1;
3040         eth_dev->data->nb_tx_queues = (uint16_t)1;
3041
3042         /* Allocate memory for storing MAC addresses */
3043         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3044                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3045         if (eth_dev->data->mac_addrs == NULL) {
3046                 RTE_BOND_LOG(ERR,
3047                              "Failed to allocate %u bytes needed to store MAC addresses",
3048                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3049                 goto err;
3050         }
3051
3052         eth_dev->dev_ops = &default_dev_ops;
3053         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3054
3055         rte_spinlock_init(&internals->lock);
3056         rte_spinlock_init(&internals->lsc_lock);
3057
3058         internals->port_id = eth_dev->data->port_id;
3059         internals->mode = BONDING_MODE_INVALID;
3060         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3061         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3062         internals->burst_xmit_hash = burst_xmit_l2_hash;
3063         internals->user_defined_mac = 0;
3064
3065         internals->link_status_polling_enabled = 0;
3066
3067         internals->link_status_polling_interval_ms =
3068                 DEFAULT_POLLING_INTERVAL_10_MS;
3069         internals->link_down_delay_ms = 0;
3070         internals->link_up_delay_ms = 0;
3071
3072         internals->slave_count = 0;
3073         internals->active_slave_count = 0;
3074         internals->rx_offload_capa = 0;
3075         internals->tx_offload_capa = 0;
3076         internals->rx_queue_offload_capa = 0;
3077         internals->tx_queue_offload_capa = 0;
3078         internals->candidate_max_rx_pktlen = 0;
3079         internals->max_rx_pktlen = 0;
3080
3081         /* Initially allow to choose any offload type */
3082         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3083
3084         memset(&internals->default_rxconf, 0,
3085                sizeof(internals->default_rxconf));
3086         memset(&internals->default_txconf, 0,
3087                sizeof(internals->default_txconf));
3088
3089         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3090         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3091
3092         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3093         memset(internals->slaves, 0, sizeof(internals->slaves));
3094
3095         TAILQ_INIT(&internals->flow_list);
3096         internals->flow_isolated_valid = 0;
3097
3098         /* Set mode 4 default configuration */
3099         bond_mode_8023ad_setup(eth_dev, NULL);
3100         if (bond_ethdev_mode_set(eth_dev, mode)) {
3101                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3102                                  eth_dev->data->port_id, mode);
3103                 goto err;
3104         }
3105
3106         vlan_filter_bmp_size =
3107                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3108         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3109                                                    RTE_CACHE_LINE_SIZE);
3110         if (internals->vlan_filter_bmpmem == NULL) {
3111                 RTE_BOND_LOG(ERR,
3112                              "Failed to allocate vlan bitmap for bonded device %u",
3113                              eth_dev->data->port_id);
3114                 goto err;
3115         }
3116
3117         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3118                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3119         if (internals->vlan_filter_bmp == NULL) {
3120                 RTE_BOND_LOG(ERR,
3121                              "Failed to init vlan bitmap for bonded device %u",
3122                              eth_dev->data->port_id);
3123                 rte_free(internals->vlan_filter_bmpmem);
3124                 goto err;
3125         }
3126
3127         return eth_dev->data->port_id;
3128
3129 err:
3130         rte_free(internals);
3131         if (eth_dev != NULL) {
3132                 rte_free(eth_dev->data->mac_addrs);
3133                 rte_eth_dev_release_port(eth_dev);
3134         }
3135         return -1;
3136 }
3137
3138 static int
3139 bond_probe(struct rte_vdev_device *dev)
3140 {
3141         const char *name;
3142         struct bond_dev_private *internals;
3143         struct rte_kvargs *kvlist;
3144         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3145         int  arg_count, port_id;
3146         uint8_t agg_mode;
3147         struct rte_eth_dev *eth_dev;
3148
3149         if (!dev)
3150                 return -EINVAL;
3151
3152         name = rte_vdev_device_name(dev);
3153         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3154
3155         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3156             strlen(rte_vdev_device_args(dev)) == 0) {
3157                 eth_dev = rte_eth_dev_attach_secondary(name);
3158                 if (!eth_dev) {
3159                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3160                         return -1;
3161                 }
3162                 /* TODO: request info from primary to set up Rx and Tx */
3163                 eth_dev->dev_ops = &default_dev_ops;
3164                 eth_dev->device = &dev->device;
3165                 rte_eth_dev_probing_finish(eth_dev);
3166                 return 0;
3167         }
3168
3169         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3170                 pmd_bond_init_valid_arguments);
3171         if (kvlist == NULL)
3172                 return -1;
3173
3174         /* Parse link bonding mode */
3175         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3176                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3177                                 &bond_ethdev_parse_slave_mode_kvarg,
3178                                 &bonding_mode) != 0) {
3179                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3180                                         name);
3181                         goto parse_error;
3182                 }
3183         } else {
3184                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3185                                 "device %s", name);
3186                 goto parse_error;
3187         }
3188
3189         /* Parse socket id to create bonding device on */
3190         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3191         if (arg_count == 1) {
3192                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3193                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3194                                 != 0) {
3195                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3196                                         "bonded device %s", name);
3197                         goto parse_error;
3198                 }
3199         } else if (arg_count > 1) {
3200                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3201                                 "bonded device %s", name);
3202                 goto parse_error;
3203         } else {
3204                 socket_id = rte_socket_id();
3205         }
3206
3207         dev->device.numa_node = socket_id;
3208
3209         /* Create link bonding eth device */
3210         port_id = bond_alloc(dev, bonding_mode);
3211         if (port_id < 0) {
3212                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3213                                 "socket %u.",   name, bonding_mode, socket_id);
3214                 goto parse_error;
3215         }
3216         internals = rte_eth_devices[port_id].data->dev_private;
3217         internals->kvlist = kvlist;
3218
3219         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3220
3221         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3222                 if (rte_kvargs_process(kvlist,
3223                                 PMD_BOND_AGG_MODE_KVARG,
3224                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3225                                 &agg_mode) != 0) {
3226                         RTE_BOND_LOG(ERR,
3227                                         "Failed to parse agg selection mode for bonded device %s",
3228                                         name);
3229                         goto parse_error;
3230                 }
3231
3232                 if (internals->mode == BONDING_MODE_8023AD)
3233                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3234                                         agg_mode);
3235         } else {
3236                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3237         }
3238
3239         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3240                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3241         return 0;
3242
3243 parse_error:
3244         rte_kvargs_free(kvlist);
3245
3246         return -1;
3247 }
3248
3249 static int
3250 bond_remove(struct rte_vdev_device *dev)
3251 {
3252         struct rte_eth_dev *eth_dev;
3253         struct bond_dev_private *internals;
3254         const char *name;
3255
3256         if (!dev)
3257                 return -EINVAL;
3258
3259         name = rte_vdev_device_name(dev);
3260         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3261
3262         /* now free all data allocation - for eth_dev structure,
3263          * dummy pci driver and internal (private) data
3264          */
3265
3266         /* find an ethdev entry */
3267         eth_dev = rte_eth_dev_allocated(name);
3268         if (eth_dev == NULL)
3269                 return -ENODEV;
3270
3271         RTE_ASSERT(eth_dev->device == &dev->device);
3272
3273         internals = eth_dev->data->dev_private;
3274         if (internals->slave_count != 0)
3275                 return -EBUSY;
3276
3277         if (eth_dev->data->dev_started == 1) {
3278                 bond_ethdev_stop(eth_dev);
3279                 bond_ethdev_close(eth_dev);
3280         }
3281
3282         eth_dev->dev_ops = NULL;
3283         eth_dev->rx_pkt_burst = NULL;
3284         eth_dev->tx_pkt_burst = NULL;
3285
3286         internals = eth_dev->data->dev_private;
3287         /* Try to release mempool used in mode6. If the bond
3288          * device is not mode6, free the NULL is not problem.
3289          */
3290         rte_mempool_free(internals->mode6.mempool);
3291         rte_bitmap_free(internals->vlan_filter_bmp);
3292         rte_free(internals->vlan_filter_bmpmem);
3293         rte_free(eth_dev->data->dev_private);
3294         rte_free(eth_dev->data->mac_addrs);
3295
3296         rte_eth_dev_release_port(eth_dev);
3297
3298         return 0;
3299 }
3300
3301 /* this part will resolve the slave portids after all the other pdev and vdev
3302  * have been allocated */
3303 static int
3304 bond_ethdev_configure(struct rte_eth_dev *dev)
3305 {
3306         const char *name = dev->device->name;
3307         struct bond_dev_private *internals = dev->data->dev_private;
3308         struct rte_kvargs *kvlist = internals->kvlist;
3309         int arg_count;
3310         uint16_t port_id = dev - rte_eth_devices;
3311         uint8_t agg_mode;
3312
3313         static const uint8_t default_rss_key[40] = {
3314                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3315                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3316                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3317                 0xBE, 0xAC, 0x01, 0xFA
3318         };
3319
3320         unsigned i, j;
3321
3322         /*
3323          * If RSS is enabled, fill table with default values and
3324          * set key to the the value specified in port RSS configuration.
3325          * Fall back to default RSS key if the key is not specified
3326          */
3327         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3328                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3329                         internals->rss_key_len =
3330                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3331                         memcpy(internals->rss_key,
3332                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3333                                internals->rss_key_len);
3334                 } else {
3335                         internals->rss_key_len = sizeof(default_rss_key);
3336                         memcpy(internals->rss_key, default_rss_key,
3337                                internals->rss_key_len);
3338                 }
3339
3340                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3341                         internals->reta_conf[i].mask = ~0LL;
3342                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3343                                 internals->reta_conf[i].reta[j] =
3344                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3345                                                 dev->data->nb_rx_queues;
3346                 }
3347         }
3348
3349         /* set the max_rx_pktlen */
3350         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3351
3352         /*
3353          * if no kvlist, it means that this bonded device has been created
3354          * through the bonding api.
3355          */
3356         if (!kvlist)
3357                 return 0;
3358
3359         /* Parse MAC address for bonded device */
3360         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3361         if (arg_count == 1) {
3362                 struct ether_addr bond_mac;
3363
3364                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3365                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3366                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3367                                      name);
3368                         return -1;
3369                 }
3370
3371                 /* Set MAC address */
3372                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3373                         RTE_BOND_LOG(ERR,
3374                                      "Failed to set mac address on bonded device %s",
3375                                      name);
3376                         return -1;
3377                 }
3378         } else if (arg_count > 1) {
3379                 RTE_BOND_LOG(ERR,
3380                              "MAC address can be specified only once for bonded device %s",
3381                              name);
3382                 return -1;
3383         }
3384
3385         /* Parse/set balance mode transmit policy */
3386         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3387         if (arg_count == 1) {
3388                 uint8_t xmit_policy;
3389
3390                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3391                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3392                     0) {
3393                         RTE_BOND_LOG(INFO,
3394                                      "Invalid xmit policy specified for bonded device %s",
3395                                      name);
3396                         return -1;
3397                 }
3398
3399                 /* Set balance mode transmit policy*/
3400                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3401                         RTE_BOND_LOG(ERR,
3402                                      "Failed to set balance xmit policy on bonded device %s",
3403                                      name);
3404                         return -1;
3405                 }
3406         } else if (arg_count > 1) {
3407                 RTE_BOND_LOG(ERR,
3408                              "Transmit policy can be specified only once for bonded device %s",
3409                              name);
3410                 return -1;
3411         }
3412
3413         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3414                 if (rte_kvargs_process(kvlist,
3415                                        PMD_BOND_AGG_MODE_KVARG,
3416                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3417                                        &agg_mode) != 0) {
3418                         RTE_BOND_LOG(ERR,
3419                                      "Failed to parse agg selection mode for bonded device %s",
3420                                      name);
3421                 }
3422                 if (internals->mode == BONDING_MODE_8023AD)
3423                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3424                                                               agg_mode);
3425         }
3426
3427         /* Parse/add slave ports to bonded device */
3428         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3429                 struct bond_ethdev_slave_ports slave_ports;
3430                 unsigned i;
3431
3432                 memset(&slave_ports, 0, sizeof(slave_ports));
3433
3434                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3435                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3436                         RTE_BOND_LOG(ERR,
3437                                      "Failed to parse slave ports for bonded device %s",
3438                                      name);
3439                         return -1;
3440                 }
3441
3442                 for (i = 0; i < slave_ports.slave_count; i++) {
3443                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3444                                 RTE_BOND_LOG(ERR,
3445                                              "Failed to add port %d as slave to bonded device %s",
3446                                              slave_ports.slaves[i], name);
3447                         }
3448                 }
3449
3450         } else {
3451                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3452                 return -1;
3453         }
3454
3455         /* Parse/set primary slave port id*/
3456         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3457         if (arg_count == 1) {
3458                 uint16_t primary_slave_port_id;
3459
3460                 if (rte_kvargs_process(kvlist,
3461                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3462                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3463                                        &primary_slave_port_id) < 0) {
3464                         RTE_BOND_LOG(INFO,
3465                                      "Invalid primary slave port id specified for bonded device %s",
3466                                      name);
3467                         return -1;
3468                 }
3469
3470                 /* Set balance mode transmit policy*/
3471                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3472                     != 0) {
3473                         RTE_BOND_LOG(ERR,
3474                                      "Failed to set primary slave port %d on bonded device %s",
3475                                      primary_slave_port_id, name);
3476                         return -1;
3477                 }
3478         } else if (arg_count > 1) {
3479                 RTE_BOND_LOG(INFO,
3480                              "Primary slave can be specified only once for bonded device %s",
3481                              name);
3482                 return -1;
3483         }
3484
3485         /* Parse link status monitor polling interval */
3486         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3487         if (arg_count == 1) {
3488                 uint32_t lsc_poll_interval_ms;
3489
3490                 if (rte_kvargs_process(kvlist,
3491                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3492                                        &bond_ethdev_parse_time_ms_kvarg,
3493                                        &lsc_poll_interval_ms) < 0) {
3494                         RTE_BOND_LOG(INFO,
3495                                      "Invalid lsc polling interval value specified for bonded"
3496                                      " device %s", name);
3497                         return -1;
3498                 }
3499
3500                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3501                     != 0) {
3502                         RTE_BOND_LOG(ERR,
3503                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3504                                      lsc_poll_interval_ms, name);
3505                         return -1;
3506                 }
3507         } else if (arg_count > 1) {
3508                 RTE_BOND_LOG(INFO,
3509                              "LSC polling interval can be specified only once for bonded"
3510                              " device %s", name);
3511                 return -1;
3512         }
3513
3514         /* Parse link up interrupt propagation delay */
3515         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3516         if (arg_count == 1) {
3517                 uint32_t link_up_delay_ms;
3518
3519                 if (rte_kvargs_process(kvlist,
3520                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3521                                        &bond_ethdev_parse_time_ms_kvarg,
3522                                        &link_up_delay_ms) < 0) {
3523                         RTE_BOND_LOG(INFO,
3524                                      "Invalid link up propagation delay value specified for"
3525                                      " bonded device %s", name);
3526                         return -1;
3527                 }
3528
3529                 /* Set balance mode transmit policy*/
3530                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3531                     != 0) {
3532                         RTE_BOND_LOG(ERR,
3533                                      "Failed to set link up propagation delay (%u ms) on bonded"
3534                                      " device %s", link_up_delay_ms, name);
3535                         return -1;
3536                 }
3537         } else if (arg_count > 1) {
3538                 RTE_BOND_LOG(INFO,
3539                              "Link up propagation delay can be specified only once for"
3540                              " bonded device %s", name);
3541                 return -1;
3542         }
3543
3544         /* Parse link down interrupt propagation delay */
3545         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3546         if (arg_count == 1) {
3547                 uint32_t link_down_delay_ms;
3548
3549                 if (rte_kvargs_process(kvlist,
3550                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3551                                        &bond_ethdev_parse_time_ms_kvarg,
3552                                        &link_down_delay_ms) < 0) {
3553                         RTE_BOND_LOG(INFO,
3554                                      "Invalid link down propagation delay value specified for"
3555                                      " bonded device %s", name);
3556                         return -1;
3557                 }
3558
3559                 /* Set balance mode transmit policy*/
3560                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3561                     != 0) {
3562                         RTE_BOND_LOG(ERR,
3563                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3564                                      link_down_delay_ms, name);
3565                         return -1;
3566                 }
3567         } else if (arg_count > 1) {
3568                 RTE_BOND_LOG(INFO,
3569                              "Link down propagation delay can be specified only once for  bonded device %s",
3570                              name);
3571                 return -1;
3572         }
3573
3574         return 0;
3575 }
3576
3577 struct rte_vdev_driver pmd_bond_drv = {
3578         .probe = bond_probe,
3579         .remove = bond_remove,
3580 };
3581
3582 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3583 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3584
3585 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3586         "slave=<ifc> "
3587         "primary=<ifc> "
3588         "mode=[0-6] "
3589         "xmit_policy=[l2 | l23 | l34] "
3590         "agg_mode=[count | stable | bandwidth] "
3591         "socket_id=<int> "
3592         "mac=<mac addr> "
3593         "lsc_poll_period_ms=<int> "
3594         "up_delay=<int> "
3595         "down_delay=<int>");
3596
3597 int bond_logtype;
3598
3599 RTE_INIT(bond_init_log)
3600 {
3601         bond_logtype = rte_log_register("pmd.net.bon");
3602         if (bond_logtype >= 0)
3603                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3604 }