86e78bde8439679295ccc4fe1e0457115b25b3be
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave == slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave == slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint8_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct ether_addr bond_mac;
396
397         struct ether_hdr *hdr;
398
399         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
400         uint16_t num_rx_total = 0;      /* Total number of received packets */
401         uint16_t slaves[RTE_MAX_ETHPORTS];
402         uint16_t slave_count, idx;
403
404         uint8_t collecting;  /* current slave collecting status */
405         const uint8_t promisc = internals->promiscuous_en;
406         uint8_t i, j, k;
407         uint8_t subtype;
408
409         rte_eth_macaddr_get(internals->port_id, &bond_mac);
410         /* Copy slave list to protect against slave up/down changes during tx
411          * bursting */
412         slave_count = internals->active_slave_count;
413         memcpy(slaves, internals->active_slaves,
414                         sizeof(internals->active_slaves[0]) * slave_count);
415
416         idx = internals->active_slave;
417         if (idx >= slave_count) {
418                 internals->active_slave = 0;
419                 idx = 0;
420         }
421         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
422                 j = num_rx_total;
423                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
424                                          COLLECTING);
425
426                 /* Read packets from this slave */
427                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
428                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
429
430                 for (k = j; k < 2 && k < num_rx_total; k++)
431                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
432
433                 /* Handle slow protocol packets. */
434                 while (j < num_rx_total) {
435
436                         /* If packet is not pure L2 and is known, skip it */
437                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438                                 j++;
439                                 continue;
440                         }
441
442                         if (j + 3 < num_rx_total)
443                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
444
445                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
446                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
447
448                         /* Remove packet from array if it is slow packet or slave is not
449                          * in collecting state or bonding interface is not in promiscuous
450                          * mode and packet address does not match. */
451                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
452                                 !collecting || (!promisc &&
453                                         !is_multicast_ether_addr(&hdr->d_addr) &&
454                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
455
456                                 if (hdr->ether_type == ether_type_slow_be) {
457                                         bond_mode_8023ad_handle_slow_pkt(
458                                             internals, slaves[idx], bufs[j]);
459                                 } else
460                                         rte_pktmbuf_free(bufs[j]);
461
462                                 /* Packet is managed by mode 4 or dropped, shift the array */
463                                 num_rx_total--;
464                                 if (j < num_rx_total) {
465                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
466                                                 (num_rx_total - j));
467                                 }
468                         } else
469                                 j++;
470                 }
471                 if (unlikely(++idx == slave_count))
472                         idx = 0;
473         }
474
475         if (++internals->active_slave == slave_count)
476                 internals->active_slave = 0;
477
478         return num_rx_total;
479 }
480
481 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
482 uint32_t burstnumberRX;
483 uint32_t burstnumberTX;
484
485 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
486
487 static void
488 arp_op_name(uint16_t arp_op, char *buf)
489 {
490         switch (arp_op) {
491         case ARP_OP_REQUEST:
492                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
493                 return;
494         case ARP_OP_REPLY:
495                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
496                 return;
497         case ARP_OP_REVREQUEST:
498                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
499                                 "Reverse ARP Request");
500                 return;
501         case ARP_OP_REVREPLY:
502                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
503                                 "Reverse ARP Reply");
504                 return;
505         case ARP_OP_INVREQUEST:
506                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
507                                 "Peer Identify Request");
508                 return;
509         case ARP_OP_INVREPLY:
510                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
511                                 "Peer Identify Reply");
512                 return;
513         default:
514                 break;
515         }
516         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
517         return;
518 }
519 #endif
520 #define MaxIPv4String   16
521 static void
522 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
523 {
524         uint32_t ipv4_addr;
525
526         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
529                 ipv4_addr & 0xFF);
530 }
531
532 #define MAX_CLIENTS_NUMBER      128
533 uint8_t active_clients;
534 struct client_stats_t {
535         uint16_t port;
536         uint32_t ipv4_addr;
537         uint32_t ipv4_rx_packets;
538         uint32_t ipv4_tx_packets;
539 };
540 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
541
542 static void
543 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
544 {
545         int i = 0;
546
547         for (; i < MAX_CLIENTS_NUMBER; i++)     {
548                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
549                         /* Just update RX packets number for this client */
550                         if (TXorRXindicator == &burstnumberRX)
551                                 client_stats[i].ipv4_rx_packets++;
552                         else
553                                 client_stats[i].ipv4_tx_packets++;
554                         return;
555                 }
556         }
557         /* We have a new client. Insert him to the table, and increment stats */
558         if (TXorRXindicator == &burstnumberRX)
559                 client_stats[active_clients].ipv4_rx_packets++;
560         else
561                 client_stats[active_clients].ipv4_tx_packets++;
562         client_stats[active_clients].ipv4_addr = addr;
563         client_stats[active_clients].port = port;
564         active_clients++;
565
566 }
567
568 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
569 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
571                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
573                 info,                                                   \
574                 port,                                                   \
575                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
578                 src_ip,                                                 \
579                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
582                 dst_ip,                                                 \
583                 arp_op, ++burstnumber)
584 #endif
585
586 static void
587 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
588                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
589 {
590         struct ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592         struct arp_hdr *arp_h;
593         char dst_ip[16];
594         char ArpOp[24];
595         char buf[16];
596 #endif
597         char src_ip[16];
598
599         uint16_t ether_type = eth_h->ether_type;
600         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603         strlcpy(buf, info, 16);
604 #endif
605
606         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612 #endif
613                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614         }
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
618                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
621                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
622         }
623 #endif
624 }
625 #endif
626
627 static uint16_t
628 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
629 {
630         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
631         struct bond_dev_private *internals = bd_tx_q->dev_private;
632         struct ether_hdr *eth_h;
633         uint16_t ether_type, offset;
634         uint16_t nb_recv_pkts;
635         int i;
636
637         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
638
639         for (i = 0; i < nb_recv_pkts; i++) {
640                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
641                 ether_type = eth_h->ether_type;
642                 offset = get_vlan_offset(eth_h, &ether_type);
643
644                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
645 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
646                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
647 #endif
648                         bond_mode_alb_arp_recv(eth_h, offset, internals);
649                 }
650 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
651                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
652                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
653 #endif
654         }
655
656         return nb_recv_pkts;
657 }
658
659 static uint16_t
660 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
661                 uint16_t nb_pkts)
662 {
663         struct bond_dev_private *internals;
664         struct bond_tx_queue *bd_tx_q;
665
666         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
667         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
668
669         uint16_t num_of_slaves;
670         uint16_t slaves[RTE_MAX_ETHPORTS];
671
672         uint16_t num_tx_total = 0, num_tx_slave;
673
674         static int slave_idx = 0;
675         int i, cslave_idx = 0, tx_fail_total = 0;
676
677         bd_tx_q = (struct bond_tx_queue *)queue;
678         internals = bd_tx_q->dev_private;
679
680         /* Copy slave list to protect against slave up/down changes during tx
681          * bursting */
682         num_of_slaves = internals->active_slave_count;
683         memcpy(slaves, internals->active_slaves,
684                         sizeof(internals->active_slaves[0]) * num_of_slaves);
685
686         if (num_of_slaves < 1)
687                 return num_tx_total;
688
689         /* Populate slaves mbuf with which packets are to be sent on it  */
690         for (i = 0; i < nb_pkts; i++) {
691                 cslave_idx = (slave_idx + i) % num_of_slaves;
692                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
693         }
694
695         /* increment current slave index so the next call to tx burst starts on the
696          * next slave */
697         slave_idx = ++cslave_idx;
698
699         /* Send packet burst on each slave device */
700         for (i = 0; i < num_of_slaves; i++) {
701                 if (slave_nb_pkts[i] > 0) {
702                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
703                                         slave_bufs[i], slave_nb_pkts[i]);
704
705                         /* if tx burst fails move packets to end of bufs */
706                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
707                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
708
709                                 tx_fail_total += tx_fail_slave;
710
711                                 memcpy(&bufs[nb_pkts - tx_fail_total],
712                                        &slave_bufs[i][num_tx_slave],
713                                        tx_fail_slave * sizeof(bufs[0]));
714                         }
715                         num_tx_total += num_tx_slave;
716                 }
717         }
718
719         return num_tx_total;
720 }
721
722 static uint16_t
723 bond_ethdev_tx_burst_active_backup(void *queue,
724                 struct rte_mbuf **bufs, uint16_t nb_pkts)
725 {
726         struct bond_dev_private *internals;
727         struct bond_tx_queue *bd_tx_q;
728
729         bd_tx_q = (struct bond_tx_queue *)queue;
730         internals = bd_tx_q->dev_private;
731
732         if (internals->active_slave_count < 1)
733                 return 0;
734
735         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
736                         bufs, nb_pkts);
737 }
738
739 static inline uint16_t
740 ether_hash(struct ether_hdr *eth_hdr)
741 {
742         unaligned_uint16_t *word_src_addr =
743                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
744         unaligned_uint16_t *word_dst_addr =
745                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
746
747         return (word_src_addr[0] ^ word_dst_addr[0]) ^
748                         (word_src_addr[1] ^ word_dst_addr[1]) ^
749                         (word_src_addr[2] ^ word_dst_addr[2]);
750 }
751
752 static inline uint32_t
753 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
754 {
755         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
756 }
757
758 static inline uint32_t
759 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
760 {
761         unaligned_uint32_t *word_src_addr =
762                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
763         unaligned_uint32_t *word_dst_addr =
764                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
765
766         return (word_src_addr[0] ^ word_dst_addr[0]) ^
767                         (word_src_addr[1] ^ word_dst_addr[1]) ^
768                         (word_src_addr[2] ^ word_dst_addr[2]) ^
769                         (word_src_addr[3] ^ word_dst_addr[3]);
770 }
771
772
773 void
774 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
775                 uint8_t slave_count, uint16_t *slaves)
776 {
777         struct ether_hdr *eth_hdr;
778         uint32_t hash;
779         int i;
780
781         for (i = 0; i < nb_pkts; i++) {
782                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
783
784                 hash = ether_hash(eth_hdr);
785
786                 slaves[i] = (hash ^= hash >> 8) % slave_count;
787         }
788 }
789
790 void
791 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
792                 uint8_t slave_count, uint16_t *slaves)
793 {
794         uint16_t i;
795         struct ether_hdr *eth_hdr;
796         uint16_t proto;
797         size_t vlan_offset;
798         uint32_t hash, l3hash;
799
800         for (i = 0; i < nb_pkts; i++) {
801                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
802                 l3hash = 0;
803
804                 proto = eth_hdr->ether_type;
805                 hash = ether_hash(eth_hdr);
806
807                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
808
809                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
810                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
811                                         ((char *)(eth_hdr + 1) + vlan_offset);
812                         l3hash = ipv4_hash(ipv4_hdr);
813
814                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
815                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
816                                         ((char *)(eth_hdr + 1) + vlan_offset);
817                         l3hash = ipv6_hash(ipv6_hdr);
818                 }
819
820                 hash = hash ^ l3hash;
821                 hash ^= hash >> 16;
822                 hash ^= hash >> 8;
823
824                 slaves[i] = hash % slave_count;
825         }
826 }
827
828 void
829 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
830                 uint8_t slave_count, uint16_t *slaves)
831 {
832         struct ether_hdr *eth_hdr;
833         uint16_t proto;
834         size_t vlan_offset;
835         int i;
836
837         struct udp_hdr *udp_hdr;
838         struct tcp_hdr *tcp_hdr;
839         uint32_t hash, l3hash, l4hash;
840
841         for (i = 0; i < nb_pkts; i++) {
842                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
843                 proto = eth_hdr->ether_type;
844                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
845                 l3hash = 0;
846                 l4hash = 0;
847
848                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
849                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
850                                         ((char *)(eth_hdr + 1) + vlan_offset);
851                         size_t ip_hdr_offset;
852
853                         l3hash = ipv4_hash(ipv4_hdr);
854
855                         /* there is no L4 header in fragmented packet */
856                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
857                                                                 == 0)) {
858                                 ip_hdr_offset = (ipv4_hdr->version_ihl
859                                         & IPV4_HDR_IHL_MASK) *
860                                         IPV4_IHL_MULTIPLIER;
861
862                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
863                                         tcp_hdr = (struct tcp_hdr *)
864                                                 ((char *)ipv4_hdr +
865                                                         ip_hdr_offset);
866                                         l4hash = HASH_L4_PORTS(tcp_hdr);
867                                 } else if (ipv4_hdr->next_proto_id ==
868                                                                 IPPROTO_UDP) {
869                                         udp_hdr = (struct udp_hdr *)
870                                                 ((char *)ipv4_hdr +
871                                                         ip_hdr_offset);
872                                         l4hash = HASH_L4_PORTS(udp_hdr);
873                                 }
874                         }
875                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
876                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
877                                         ((char *)(eth_hdr + 1) + vlan_offset);
878                         l3hash = ipv6_hash(ipv6_hdr);
879
880                         if (ipv6_hdr->proto == IPPROTO_TCP) {
881                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
882                                 l4hash = HASH_L4_PORTS(tcp_hdr);
883                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
884                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
885                                 l4hash = HASH_L4_PORTS(udp_hdr);
886                         }
887                 }
888
889                 hash = l3hash ^ l4hash;
890                 hash ^= hash >> 16;
891                 hash ^= hash >> 8;
892
893                 slaves[i] = hash % slave_count;
894         }
895 }
896
897 struct bwg_slave {
898         uint64_t bwg_left_int;
899         uint64_t bwg_left_remainder;
900         uint8_t slave;
901 };
902
903 void
904 bond_tlb_activate_slave(struct bond_dev_private *internals) {
905         int i;
906
907         for (i = 0; i < internals->active_slave_count; i++) {
908                 tlb_last_obytets[internals->active_slaves[i]] = 0;
909         }
910 }
911
912 static int
913 bandwidth_cmp(const void *a, const void *b)
914 {
915         const struct bwg_slave *bwg_a = a;
916         const struct bwg_slave *bwg_b = b;
917         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
918         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
919                         (int64_t)bwg_a->bwg_left_remainder;
920         if (diff > 0)
921                 return 1;
922         else if (diff < 0)
923                 return -1;
924         else if (diff2 > 0)
925                 return 1;
926         else if (diff2 < 0)
927                 return -1;
928         else
929                 return 0;
930 }
931
932 static void
933 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
934                 struct bwg_slave *bwg_slave)
935 {
936         struct rte_eth_link link_status;
937
938         rte_eth_link_get_nowait(port_id, &link_status);
939         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
940         if (link_bwg == 0)
941                 return;
942         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
943         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
944         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
945 }
946
947 static void
948 bond_ethdev_update_tlb_slave_cb(void *arg)
949 {
950         struct bond_dev_private *internals = arg;
951         struct rte_eth_stats slave_stats;
952         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
953         uint8_t slave_count;
954         uint64_t tx_bytes;
955
956         uint8_t update_stats = 0;
957         uint8_t i, slave_id;
958
959         internals->slave_update_idx++;
960
961
962         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
963                 update_stats = 1;
964
965         for (i = 0; i < internals->active_slave_count; i++) {
966                 slave_id = internals->active_slaves[i];
967                 rte_eth_stats_get(slave_id, &slave_stats);
968                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
969                 bandwidth_left(slave_id, tx_bytes,
970                                 internals->slave_update_idx, &bwg_array[i]);
971                 bwg_array[i].slave = slave_id;
972
973                 if (update_stats) {
974                         tlb_last_obytets[slave_id] = slave_stats.obytes;
975                 }
976         }
977
978         if (update_stats == 1)
979                 internals->slave_update_idx = 0;
980
981         slave_count = i;
982         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
983         for (i = 0; i < slave_count; i++)
984                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
985
986         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
987                         (struct bond_dev_private *)internals);
988 }
989
990 static uint16_t
991 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
992 {
993         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
994         struct bond_dev_private *internals = bd_tx_q->dev_private;
995
996         struct rte_eth_dev *primary_port =
997                         &rte_eth_devices[internals->primary_port];
998         uint16_t num_tx_total = 0;
999         uint16_t i, j;
1000
1001         uint16_t num_of_slaves = internals->active_slave_count;
1002         uint16_t slaves[RTE_MAX_ETHPORTS];
1003
1004         struct ether_hdr *ether_hdr;
1005         struct ether_addr primary_slave_addr;
1006         struct ether_addr active_slave_addr;
1007
1008         if (num_of_slaves < 1)
1009                 return num_tx_total;
1010
1011         memcpy(slaves, internals->tlb_slaves_order,
1012                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1013
1014
1015         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1016
1017         if (nb_pkts > 3) {
1018                 for (i = 0; i < 3; i++)
1019                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1020         }
1021
1022         for (i = 0; i < num_of_slaves; i++) {
1023                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1024                 for (j = num_tx_total; j < nb_pkts; j++) {
1025                         if (j + 3 < nb_pkts)
1026                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1027
1028                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1029                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1030                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1031 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1032                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1033 #endif
1034                 }
1035
1036                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1037                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1038
1039                 if (num_tx_total == nb_pkts)
1040                         break;
1041         }
1042
1043         return num_tx_total;
1044 }
1045
1046 void
1047 bond_tlb_disable(struct bond_dev_private *internals)
1048 {
1049         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1050 }
1051
1052 void
1053 bond_tlb_enable(struct bond_dev_private *internals)
1054 {
1055         bond_ethdev_update_tlb_slave_cb(internals);
1056 }
1057
1058 static uint16_t
1059 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1060 {
1061         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1062         struct bond_dev_private *internals = bd_tx_q->dev_private;
1063
1064         struct ether_hdr *eth_h;
1065         uint16_t ether_type, offset;
1066
1067         struct client_data *client_info;
1068
1069         /*
1070          * We create transmit buffers for every slave and one additional to send
1071          * through tlb. In worst case every packet will be send on one port.
1072          */
1073         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1074         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1075
1076         /*
1077          * We create separate transmit buffers for update packets as they won't
1078          * be counted in num_tx_total.
1079          */
1080         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1081         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1082
1083         struct rte_mbuf *upd_pkt;
1084         size_t pkt_size;
1085
1086         uint16_t num_send, num_not_send = 0;
1087         uint16_t num_tx_total = 0;
1088         uint16_t slave_idx;
1089
1090         int i, j;
1091
1092         /* Search tx buffer for ARP packets and forward them to alb */
1093         for (i = 0; i < nb_pkts; i++) {
1094                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1095                 ether_type = eth_h->ether_type;
1096                 offset = get_vlan_offset(eth_h, &ether_type);
1097
1098                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1099                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1100
1101                         /* Change src mac in eth header */
1102                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1103
1104                         /* Add packet to slave tx buffer */
1105                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1106                         slave_bufs_pkts[slave_idx]++;
1107                 } else {
1108                         /* If packet is not ARP, send it with TLB policy */
1109                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1110                                         bufs[i];
1111                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1112                 }
1113         }
1114
1115         /* Update connected client ARP tables */
1116         if (internals->mode6.ntt) {
1117                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1118                         client_info = &internals->mode6.client_table[i];
1119
1120                         if (client_info->in_use) {
1121                                 /* Allocate new packet to send ARP update on current slave */
1122                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1123                                 if (upd_pkt == NULL) {
1124                                         RTE_BOND_LOG(ERR,
1125                                                      "Failed to allocate ARP packet from pool");
1126                                         continue;
1127                                 }
1128                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1129                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1130                                 upd_pkt->data_len = pkt_size;
1131                                 upd_pkt->pkt_len = pkt_size;
1132
1133                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1134                                                 internals);
1135
1136                                 /* Add packet to update tx buffer */
1137                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1138                                 update_bufs_pkts[slave_idx]++;
1139                         }
1140                 }
1141                 internals->mode6.ntt = 0;
1142         }
1143
1144         /* Send ARP packets on proper slaves */
1145         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1146                 if (slave_bufs_pkts[i] > 0) {
1147                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1148                                         slave_bufs[i], slave_bufs_pkts[i]);
1149                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1150                                 bufs[nb_pkts - 1 - num_not_send - j] =
1151                                                 slave_bufs[i][nb_pkts - 1 - j];
1152                         }
1153
1154                         num_tx_total += num_send;
1155                         num_not_send += slave_bufs_pkts[i] - num_send;
1156
1157 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1158         /* Print TX stats including update packets */
1159                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1160                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1161                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1162                         }
1163 #endif
1164                 }
1165         }
1166
1167         /* Send update packets on proper slaves */
1168         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1169                 if (update_bufs_pkts[i] > 0) {
1170                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1171                                         update_bufs_pkts[i]);
1172                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1173                                 rte_pktmbuf_free(update_bufs[i][j]);
1174                         }
1175 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1176                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1177                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1178                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1179                         }
1180 #endif
1181                 }
1182         }
1183
1184         /* Send non-ARP packets using tlb policy */
1185         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1186                 num_send = bond_ethdev_tx_burst_tlb(queue,
1187                                 slave_bufs[RTE_MAX_ETHPORTS],
1188                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1189
1190                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1191                         bufs[nb_pkts - 1 - num_not_send - j] =
1192                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1193                 }
1194
1195                 num_tx_total += num_send;
1196         }
1197
1198         return num_tx_total;
1199 }
1200
1201 static uint16_t
1202 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1203                 uint16_t nb_bufs)
1204 {
1205         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1206         struct bond_dev_private *internals = bd_tx_q->dev_private;
1207
1208         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1209         uint16_t slave_count;
1210
1211         /* Array to sort mbufs for transmission on each slave into */
1212         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1213         /* Number of mbufs for transmission on each slave */
1214         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1215         /* Mapping array generated by hash function to map mbufs to slaves */
1216         uint16_t bufs_slave_port_idxs[nb_bufs];
1217
1218         uint16_t slave_tx_count;
1219         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1220
1221         uint16_t i;
1222
1223         if (unlikely(nb_bufs == 0))
1224                 return 0;
1225
1226         /* Copy slave list to protect against slave up/down changes during tx
1227          * bursting */
1228         slave_count = internals->active_slave_count;
1229         if (unlikely(slave_count < 1))
1230                 return 0;
1231
1232         memcpy(slave_port_ids, internals->active_slaves,
1233                         sizeof(slave_port_ids[0]) * slave_count);
1234
1235         /*
1236          * Populate slaves mbuf with the packets which are to be sent on it
1237          * selecting output slave using hash based on xmit policy
1238          */
1239         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1240                         bufs_slave_port_idxs);
1241
1242         for (i = 0; i < nb_bufs; i++) {
1243                 /* Populate slave mbuf arrays with mbufs for that slave. */
1244                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1245
1246                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1247         }
1248
1249         /* Send packet burst on each slave device */
1250         for (i = 0; i < slave_count; i++) {
1251                 if (slave_nb_bufs[i] == 0)
1252                         continue;
1253
1254                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1255                                 bd_tx_q->queue_id, slave_bufs[i],
1256                                 slave_nb_bufs[i]);
1257
1258                 total_tx_count += slave_tx_count;
1259
1260                 /* If tx burst fails move packets to end of bufs */
1261                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1262                         int slave_tx_fail_count = slave_nb_bufs[i] -
1263                                         slave_tx_count;
1264                         total_tx_fail_count += slave_tx_fail_count;
1265                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1266                                &slave_bufs[i][slave_tx_count],
1267                                slave_tx_fail_count * sizeof(bufs[0]));
1268                 }
1269         }
1270
1271         return total_tx_count;
1272 }
1273
1274 static uint16_t
1275 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1276                 uint16_t nb_bufs)
1277 {
1278         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1279         struct bond_dev_private *internals = bd_tx_q->dev_private;
1280
1281         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1282         uint16_t slave_count;
1283
1284         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1285         uint16_t dist_slave_count;
1286
1287         /* 2-D array to sort mbufs for transmission on each slave into */
1288         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1289         /* Number of mbufs for transmission on each slave */
1290         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1291         /* Mapping array generated by hash function to map mbufs to slaves */
1292         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1293
1294         uint16_t slave_tx_count;
1295         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1296
1297         uint16_t i;
1298
1299         if (unlikely(nb_bufs == 0))
1300                 return 0;
1301
1302         /* Copy slave list to protect against slave up/down changes during tx
1303          * bursting */
1304         slave_count = internals->active_slave_count;
1305         if (unlikely(slave_count < 1))
1306                 return 0;
1307
1308         memcpy(slave_port_ids, internals->active_slaves,
1309                         sizeof(slave_port_ids[0]) * slave_count);
1310
1311         dist_slave_count = 0;
1312         for (i = 0; i < slave_count; i++) {
1313                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1314
1315                 if (ACTOR_STATE(port, DISTRIBUTING))
1316                         dist_slave_port_ids[dist_slave_count++] =
1317                                         slave_port_ids[i];
1318         }
1319
1320         if (likely(dist_slave_count > 1)) {
1321
1322                 /*
1323                  * Populate slaves mbuf with the packets which are to be sent
1324                  * on it, selecting output slave using hash based on xmit policy
1325                  */
1326                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1327                                 bufs_slave_port_idxs);
1328
1329                 for (i = 0; i < nb_bufs; i++) {
1330                         /*
1331                          * Populate slave mbuf arrays with mbufs for that
1332                          * slave
1333                          */
1334                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1335
1336                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1337                                         bufs[i];
1338                 }
1339
1340
1341                 /* Send packet burst on each slave device */
1342                 for (i = 0; i < dist_slave_count; i++) {
1343                         if (slave_nb_bufs[i] == 0)
1344                                 continue;
1345
1346                         slave_tx_count = rte_eth_tx_burst(
1347                                         dist_slave_port_ids[i],
1348                                         bd_tx_q->queue_id, slave_bufs[i],
1349                                         slave_nb_bufs[i]);
1350
1351                         total_tx_count += slave_tx_count;
1352
1353                         /* If tx burst fails move packets to end of bufs */
1354                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1355                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1356                                                 slave_tx_count;
1357                                 total_tx_fail_count += slave_tx_fail_count;
1358
1359                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1360                                        &slave_bufs[i][slave_tx_count],
1361                                        slave_tx_fail_count * sizeof(bufs[0]));
1362                         }
1363                 }
1364         }
1365
1366         /* Check for LACP control packets and send if available */
1367         for (i = 0; i < slave_count; i++) {
1368                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1369                 struct rte_mbuf *ctrl_pkt = NULL;
1370
1371                 if (likely(rte_ring_empty(port->tx_ring)))
1372                         continue;
1373
1374                 if (rte_ring_dequeue(port->tx_ring,
1375                                      (void **)&ctrl_pkt) != -ENOENT) {
1376                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1377                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1378                         /*
1379                          * re-enqueue LAG control plane packets to buffering
1380                          * ring if transmission fails so the packet isn't lost.
1381                          */
1382                         if (slave_tx_count != 1)
1383                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1384                 }
1385         }
1386
1387         return total_tx_count;
1388 }
1389
1390 static uint16_t
1391 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1392                 uint16_t nb_pkts)
1393 {
1394         struct bond_dev_private *internals;
1395         struct bond_tx_queue *bd_tx_q;
1396
1397         uint8_t tx_failed_flag = 0, num_of_slaves;
1398         uint16_t slaves[RTE_MAX_ETHPORTS];
1399
1400         uint16_t max_nb_of_tx_pkts = 0;
1401
1402         int slave_tx_total[RTE_MAX_ETHPORTS];
1403         int i, most_successful_tx_slave = -1;
1404
1405         bd_tx_q = (struct bond_tx_queue *)queue;
1406         internals = bd_tx_q->dev_private;
1407
1408         /* Copy slave list to protect against slave up/down changes during tx
1409          * bursting */
1410         num_of_slaves = internals->active_slave_count;
1411         memcpy(slaves, internals->active_slaves,
1412                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1413
1414         if (num_of_slaves < 1)
1415                 return 0;
1416
1417         /* Increment reference count on mbufs */
1418         for (i = 0; i < nb_pkts; i++)
1419                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1420
1421         /* Transmit burst on each active slave */
1422         for (i = 0; i < num_of_slaves; i++) {
1423                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1424                                         bufs, nb_pkts);
1425
1426                 if (unlikely(slave_tx_total[i] < nb_pkts))
1427                         tx_failed_flag = 1;
1428
1429                 /* record the value and slave index for the slave which transmits the
1430                  * maximum number of packets */
1431                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1432                         max_nb_of_tx_pkts = slave_tx_total[i];
1433                         most_successful_tx_slave = i;
1434                 }
1435         }
1436
1437         /* if slaves fail to transmit packets from burst, the calling application
1438          * is not expected to know about multiple references to packets so we must
1439          * handle failures of all packets except those of the most successful slave
1440          */
1441         if (unlikely(tx_failed_flag))
1442                 for (i = 0; i < num_of_slaves; i++)
1443                         if (i != most_successful_tx_slave)
1444                                 while (slave_tx_total[i] < nb_pkts)
1445                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1446
1447         return max_nb_of_tx_pkts;
1448 }
1449
1450 void
1451 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1452 {
1453         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1454
1455         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1456                 /**
1457                  * If in mode 4 then save the link properties of the first
1458                  * slave, all subsequent slaves must match these properties
1459                  */
1460                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1461
1462                 bond_link->link_autoneg = slave_link->link_autoneg;
1463                 bond_link->link_duplex = slave_link->link_duplex;
1464                 bond_link->link_speed = slave_link->link_speed;
1465         } else {
1466                 /**
1467                  * In any other mode the link properties are set to default
1468                  * values of AUTONEG/DUPLEX
1469                  */
1470                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1471                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1472         }
1473 }
1474
1475 int
1476 link_properties_valid(struct rte_eth_dev *ethdev,
1477                 struct rte_eth_link *slave_link)
1478 {
1479         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1480
1481         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1482                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1483
1484                 if (bond_link->link_duplex != slave_link->link_duplex ||
1485                         bond_link->link_autoneg != slave_link->link_autoneg ||
1486                         bond_link->link_speed != slave_link->link_speed)
1487                         return -1;
1488         }
1489
1490         return 0;
1491 }
1492
1493 int
1494 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1495 {
1496         struct ether_addr *mac_addr;
1497
1498         if (eth_dev == NULL) {
1499                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1500                 return -1;
1501         }
1502
1503         if (dst_mac_addr == NULL) {
1504                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1505                 return -1;
1506         }
1507
1508         mac_addr = eth_dev->data->mac_addrs;
1509
1510         ether_addr_copy(mac_addr, dst_mac_addr);
1511         return 0;
1512 }
1513
1514 int
1515 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1516 {
1517         struct ether_addr *mac_addr;
1518
1519         if (eth_dev == NULL) {
1520                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1521                 return -1;
1522         }
1523
1524         if (new_mac_addr == NULL) {
1525                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1526                 return -1;
1527         }
1528
1529         mac_addr = eth_dev->data->mac_addrs;
1530
1531         /* If new MAC is different to current MAC then update */
1532         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1533                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1534
1535         return 0;
1536 }
1537
1538 static const struct ether_addr null_mac_addr;
1539
1540 /*
1541  * Add additional MAC addresses to the slave
1542  */
1543 int
1544 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1545                 uint16_t slave_port_id)
1546 {
1547         int i, ret;
1548         struct ether_addr *mac_addr;
1549
1550         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1551                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1552                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1553                         break;
1554
1555                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1556                 if (ret < 0) {
1557                         /* rollback */
1558                         for (i--; i > 0; i--)
1559                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1560                                         &bonded_eth_dev->data->mac_addrs[i]);
1561                         return ret;
1562                 }
1563         }
1564
1565         return 0;
1566 }
1567
1568 /*
1569  * Remove additional MAC addresses from the slave
1570  */
1571 int
1572 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1573                 uint16_t slave_port_id)
1574 {
1575         int i, rc, ret;
1576         struct ether_addr *mac_addr;
1577
1578         rc = 0;
1579         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1580                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1581                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1582                         break;
1583
1584                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1585                 /* save only the first error */
1586                 if (ret < 0 && rc == 0)
1587                         rc = ret;
1588         }
1589
1590         return rc;
1591 }
1592
1593 int
1594 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1595 {
1596         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1597         int i;
1598
1599         /* Update slave devices MAC addresses */
1600         if (internals->slave_count < 1)
1601                 return -1;
1602
1603         switch (internals->mode) {
1604         case BONDING_MODE_ROUND_ROBIN:
1605         case BONDING_MODE_BALANCE:
1606         case BONDING_MODE_BROADCAST:
1607                 for (i = 0; i < internals->slave_count; i++) {
1608                         if (rte_eth_dev_default_mac_addr_set(
1609                                         internals->slaves[i].port_id,
1610                                         bonded_eth_dev->data->mac_addrs)) {
1611                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1612                                                 internals->slaves[i].port_id);
1613                                 return -1;
1614                         }
1615                 }
1616                 break;
1617         case BONDING_MODE_8023AD:
1618                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1619                 break;
1620         case BONDING_MODE_ACTIVE_BACKUP:
1621         case BONDING_MODE_TLB:
1622         case BONDING_MODE_ALB:
1623         default:
1624                 for (i = 0; i < internals->slave_count; i++) {
1625                         if (internals->slaves[i].port_id ==
1626                                         internals->current_primary_port) {
1627                                 if (rte_eth_dev_default_mac_addr_set(
1628                                                 internals->primary_port,
1629                                                 bonded_eth_dev->data->mac_addrs)) {
1630                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1631                                                         internals->current_primary_port);
1632                                         return -1;
1633                                 }
1634                         } else {
1635                                 if (rte_eth_dev_default_mac_addr_set(
1636                                                 internals->slaves[i].port_id,
1637                                                 &internals->slaves[i].persisted_mac_addr)) {
1638                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1639                                                         internals->slaves[i].port_id);
1640                                         return -1;
1641                                 }
1642                         }
1643                 }
1644         }
1645
1646         return 0;
1647 }
1648
1649 int
1650 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1651 {
1652         struct bond_dev_private *internals;
1653
1654         internals = eth_dev->data->dev_private;
1655
1656         switch (mode) {
1657         case BONDING_MODE_ROUND_ROBIN:
1658                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1659                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1660                 break;
1661         case BONDING_MODE_ACTIVE_BACKUP:
1662                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1663                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1664                 break;
1665         case BONDING_MODE_BALANCE:
1666                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1667                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1668                 break;
1669         case BONDING_MODE_BROADCAST:
1670                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1671                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1672                 break;
1673         case BONDING_MODE_8023AD:
1674                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1675                         return -1;
1676
1677                 if (internals->mode4.dedicated_queues.enabled == 0) {
1678                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1679                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1680                         RTE_BOND_LOG(WARNING,
1681                                 "Using mode 4, it is necessary to do TX burst "
1682                                 "and RX burst at least every 100ms.");
1683                 } else {
1684                         /* Use flow director's optimization */
1685                         eth_dev->rx_pkt_burst =
1686                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1687                         eth_dev->tx_pkt_burst =
1688                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1689                 }
1690                 break;
1691         case BONDING_MODE_TLB:
1692                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1693                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1694                 break;
1695         case BONDING_MODE_ALB:
1696                 if (bond_mode_alb_enable(eth_dev) != 0)
1697                         return -1;
1698
1699                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1700                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1701                 break;
1702         default:
1703                 return -1;
1704         }
1705
1706         internals->mode = mode;
1707
1708         return 0;
1709 }
1710
1711
1712 static int
1713 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1714                 struct rte_eth_dev *slave_eth_dev)
1715 {
1716         int errval = 0;
1717         struct bond_dev_private *internals = (struct bond_dev_private *)
1718                 bonded_eth_dev->data->dev_private;
1719         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1720
1721         if (port->slow_pool == NULL) {
1722                 char mem_name[256];
1723                 int slave_id = slave_eth_dev->data->port_id;
1724
1725                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1726                                 slave_id);
1727                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1728                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1729                         slave_eth_dev->data->numa_node);
1730
1731                 /* Any memory allocation failure in initialization is critical because
1732                  * resources can't be free, so reinitialization is impossible. */
1733                 if (port->slow_pool == NULL) {
1734                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1735                                 slave_id, mem_name, rte_strerror(rte_errno));
1736                 }
1737         }
1738
1739         if (internals->mode4.dedicated_queues.enabled == 1) {
1740                 /* Configure slow Rx queue */
1741
1742                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1743                                 internals->mode4.dedicated_queues.rx_qid, 128,
1744                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1745                                 NULL, port->slow_pool);
1746                 if (errval != 0) {
1747                         RTE_BOND_LOG(ERR,
1748                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1749                                         slave_eth_dev->data->port_id,
1750                                         internals->mode4.dedicated_queues.rx_qid,
1751                                         errval);
1752                         return errval;
1753                 }
1754
1755                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1756                                 internals->mode4.dedicated_queues.tx_qid, 512,
1757                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1758                                 NULL);
1759                 if (errval != 0) {
1760                         RTE_BOND_LOG(ERR,
1761                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1762                                 slave_eth_dev->data->port_id,
1763                                 internals->mode4.dedicated_queues.tx_qid,
1764                                 errval);
1765                         return errval;
1766                 }
1767         }
1768         return 0;
1769 }
1770
1771 int
1772 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1773                 struct rte_eth_dev *slave_eth_dev)
1774 {
1775         struct bond_rx_queue *bd_rx_q;
1776         struct bond_tx_queue *bd_tx_q;
1777         uint16_t nb_rx_queues;
1778         uint16_t nb_tx_queues;
1779
1780         int errval;
1781         uint16_t q_id;
1782         struct rte_flow_error flow_error;
1783
1784         struct bond_dev_private *internals = (struct bond_dev_private *)
1785                 bonded_eth_dev->data->dev_private;
1786
1787         /* Stop slave */
1788         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1789
1790         /* Enable interrupts on slave device if supported */
1791         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1792                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1793
1794         /* If RSS is enabled for bonding, try to enable it for slaves  */
1795         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1796                 if (internals->rss_key_len != 0) {
1797                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1798                                         internals->rss_key_len;
1799                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1800                                         internals->rss_key;
1801                 } else {
1802                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1803                 }
1804
1805                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1806                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1807                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1808                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1809         }
1810
1811         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1812                         DEV_RX_OFFLOAD_VLAN_FILTER)
1813                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1814                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1815         else
1816                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1817                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1818
1819         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1820         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1821
1822         if (internals->mode == BONDING_MODE_8023AD) {
1823                 if (internals->mode4.dedicated_queues.enabled == 1) {
1824                         nb_rx_queues++;
1825                         nb_tx_queues++;
1826                 }
1827         }
1828
1829         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1830                                      bonded_eth_dev->data->mtu);
1831         if (errval != 0 && errval != -ENOTSUP) {
1832                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1833                                 slave_eth_dev->data->port_id, errval);
1834                 return errval;
1835         }
1836
1837         /* Configure device */
1838         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1839                         nb_rx_queues, nb_tx_queues,
1840                         &(slave_eth_dev->data->dev_conf));
1841         if (errval != 0) {
1842                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1843                                 slave_eth_dev->data->port_id, errval);
1844                 return errval;
1845         }
1846
1847         /* Setup Rx Queues */
1848         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1849                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1850
1851                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1852                                 bd_rx_q->nb_rx_desc,
1853                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1854                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1855                 if (errval != 0) {
1856                         RTE_BOND_LOG(ERR,
1857                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1858                                         slave_eth_dev->data->port_id, q_id, errval);
1859                         return errval;
1860                 }
1861         }
1862
1863         /* Setup Tx Queues */
1864         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1865                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1866
1867                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1868                                 bd_tx_q->nb_tx_desc,
1869                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1870                                 &bd_tx_q->tx_conf);
1871                 if (errval != 0) {
1872                         RTE_BOND_LOG(ERR,
1873                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1874                                 slave_eth_dev->data->port_id, q_id, errval);
1875                         return errval;
1876                 }
1877         }
1878
1879         if (internals->mode == BONDING_MODE_8023AD &&
1880                         internals->mode4.dedicated_queues.enabled == 1) {
1881                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1882                                 != 0)
1883                         return errval;
1884
1885                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1886                                 slave_eth_dev->data->port_id) != 0) {
1887                         RTE_BOND_LOG(ERR,
1888                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1889                                 slave_eth_dev->data->port_id, q_id, errval);
1890                         return -1;
1891                 }
1892
1893                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1894                         rte_flow_destroy(slave_eth_dev->data->port_id,
1895                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1896                                         &flow_error);
1897
1898                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1899                                 slave_eth_dev->data->port_id);
1900         }
1901
1902         /* Start device */
1903         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1904         if (errval != 0) {
1905                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1906                                 slave_eth_dev->data->port_id, errval);
1907                 return -1;
1908         }
1909
1910         /* If RSS is enabled for bonding, synchronize RETA */
1911         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1912                 int i;
1913                 struct bond_dev_private *internals;
1914
1915                 internals = bonded_eth_dev->data->dev_private;
1916
1917                 for (i = 0; i < internals->slave_count; i++) {
1918                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1919                                 errval = rte_eth_dev_rss_reta_update(
1920                                                 slave_eth_dev->data->port_id,
1921                                                 &internals->reta_conf[0],
1922                                                 internals->slaves[i].reta_size);
1923                                 if (errval != 0) {
1924                                         RTE_BOND_LOG(WARNING,
1925                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1926                                                      " RSS Configuration for bonding may be inconsistent.",
1927                                                      slave_eth_dev->data->port_id, errval);
1928                                 }
1929                                 break;
1930                         }
1931                 }
1932         }
1933
1934         /* If lsc interrupt is set, check initial slave's link status */
1935         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1936                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1937                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1938                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1939                         NULL);
1940         }
1941
1942         return 0;
1943 }
1944
1945 void
1946 slave_remove(struct bond_dev_private *internals,
1947                 struct rte_eth_dev *slave_eth_dev)
1948 {
1949         uint8_t i;
1950
1951         for (i = 0; i < internals->slave_count; i++)
1952                 if (internals->slaves[i].port_id ==
1953                                 slave_eth_dev->data->port_id)
1954                         break;
1955
1956         if (i < (internals->slave_count - 1)) {
1957                 struct rte_flow *flow;
1958
1959                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1960                                 sizeof(internals->slaves[0]) *
1961                                 (internals->slave_count - i - 1));
1962                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1963                         memmove(&flow->flows[i], &flow->flows[i + 1],
1964                                 sizeof(flow->flows[0]) *
1965                                 (internals->slave_count - i - 1));
1966                         flow->flows[internals->slave_count - 1] = NULL;
1967                 }
1968         }
1969
1970         internals->slave_count--;
1971
1972         /* force reconfiguration of slave interfaces */
1973         _rte_eth_dev_reset(slave_eth_dev);
1974 }
1975
1976 static void
1977 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1978
1979 void
1980 slave_add(struct bond_dev_private *internals,
1981                 struct rte_eth_dev *slave_eth_dev)
1982 {
1983         struct bond_slave_details *slave_details =
1984                         &internals->slaves[internals->slave_count];
1985
1986         slave_details->port_id = slave_eth_dev->data->port_id;
1987         slave_details->last_link_status = 0;
1988
1989         /* Mark slave devices that don't support interrupts so we can
1990          * compensate when we start the bond
1991          */
1992         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1993                 slave_details->link_status_poll_enabled = 1;
1994         }
1995
1996         slave_details->link_status_wait_to_complete = 0;
1997         /* clean tlb_last_obytes when adding port for bonding device */
1998         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1999                         sizeof(struct ether_addr));
2000 }
2001
2002 void
2003 bond_ethdev_primary_set(struct bond_dev_private *internals,
2004                 uint16_t slave_port_id)
2005 {
2006         int i;
2007
2008         if (internals->active_slave_count < 1)
2009                 internals->current_primary_port = slave_port_id;
2010         else
2011                 /* Search bonded device slave ports for new proposed primary port */
2012                 for (i = 0; i < internals->active_slave_count; i++) {
2013                         if (internals->active_slaves[i] == slave_port_id)
2014                                 internals->current_primary_port = slave_port_id;
2015                 }
2016 }
2017
2018 static void
2019 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2020
2021 static int
2022 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2023 {
2024         struct bond_dev_private *internals;
2025         int i;
2026
2027         /* slave eth dev will be started by bonded device */
2028         if (check_for_bonded_ethdev(eth_dev)) {
2029                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2030                                 eth_dev->data->port_id);
2031                 return -1;
2032         }
2033
2034         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2035         eth_dev->data->dev_started = 1;
2036
2037         internals = eth_dev->data->dev_private;
2038
2039         if (internals->slave_count == 0) {
2040                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2041                 goto out_err;
2042         }
2043
2044         if (internals->user_defined_mac == 0) {
2045                 struct ether_addr *new_mac_addr = NULL;
2046
2047                 for (i = 0; i < internals->slave_count; i++)
2048                         if (internals->slaves[i].port_id == internals->primary_port)
2049                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2050
2051                 if (new_mac_addr == NULL)
2052                         goto out_err;
2053
2054                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2055                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2056                                         eth_dev->data->port_id);
2057                         goto out_err;
2058                 }
2059         }
2060
2061         /* If bonded device is configure in promiscuous mode then re-apply config */
2062         if (internals->promiscuous_en)
2063                 bond_ethdev_promiscuous_enable(eth_dev);
2064
2065         if (internals->mode == BONDING_MODE_8023AD) {
2066                 if (internals->mode4.dedicated_queues.enabled == 1) {
2067                         internals->mode4.dedicated_queues.rx_qid =
2068                                         eth_dev->data->nb_rx_queues;
2069                         internals->mode4.dedicated_queues.tx_qid =
2070                                         eth_dev->data->nb_tx_queues;
2071                 }
2072         }
2073
2074
2075         /* Reconfigure each slave device if starting bonded device */
2076         for (i = 0; i < internals->slave_count; i++) {
2077                 struct rte_eth_dev *slave_ethdev =
2078                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2079                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2080                         RTE_BOND_LOG(ERR,
2081                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2082                                 eth_dev->data->port_id,
2083                                 internals->slaves[i].port_id);
2084                         goto out_err;
2085                 }
2086                 /* We will need to poll for link status if any slave doesn't
2087                  * support interrupts
2088                  */
2089                 if (internals->slaves[i].link_status_poll_enabled)
2090                         internals->link_status_polling_enabled = 1;
2091         }
2092
2093         /* start polling if needed */
2094         if (internals->link_status_polling_enabled) {
2095                 rte_eal_alarm_set(
2096                         internals->link_status_polling_interval_ms * 1000,
2097                         bond_ethdev_slave_link_status_change_monitor,
2098                         (void *)&rte_eth_devices[internals->port_id]);
2099         }
2100
2101         /* Update all slave devices MACs*/
2102         if (mac_address_slaves_update(eth_dev) != 0)
2103                 goto out_err;
2104
2105         if (internals->user_defined_primary_port)
2106                 bond_ethdev_primary_set(internals, internals->primary_port);
2107
2108         if (internals->mode == BONDING_MODE_8023AD)
2109                 bond_mode_8023ad_start(eth_dev);
2110
2111         if (internals->mode == BONDING_MODE_TLB ||
2112                         internals->mode == BONDING_MODE_ALB)
2113                 bond_tlb_enable(internals);
2114
2115         return 0;
2116
2117 out_err:
2118         eth_dev->data->dev_started = 0;
2119         return -1;
2120 }
2121
2122 static void
2123 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2124 {
2125         uint8_t i;
2126
2127         if (dev->data->rx_queues != NULL) {
2128                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2129                         rte_free(dev->data->rx_queues[i]);
2130                         dev->data->rx_queues[i] = NULL;
2131                 }
2132                 dev->data->nb_rx_queues = 0;
2133         }
2134
2135         if (dev->data->tx_queues != NULL) {
2136                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2137                         rte_free(dev->data->tx_queues[i]);
2138                         dev->data->tx_queues[i] = NULL;
2139                 }
2140                 dev->data->nb_tx_queues = 0;
2141         }
2142 }
2143
2144 void
2145 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2146 {
2147         struct bond_dev_private *internals = eth_dev->data->dev_private;
2148         uint8_t i;
2149
2150         if (internals->mode == BONDING_MODE_8023AD) {
2151                 struct port *port;
2152                 void *pkt = NULL;
2153
2154                 bond_mode_8023ad_stop(eth_dev);
2155
2156                 /* Discard all messages to/from mode 4 state machines */
2157                 for (i = 0; i < internals->active_slave_count; i++) {
2158                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2159
2160                         RTE_ASSERT(port->rx_ring != NULL);
2161                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2162                                 rte_pktmbuf_free(pkt);
2163
2164                         RTE_ASSERT(port->tx_ring != NULL);
2165                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2166                                 rte_pktmbuf_free(pkt);
2167                 }
2168         }
2169
2170         if (internals->mode == BONDING_MODE_TLB ||
2171                         internals->mode == BONDING_MODE_ALB) {
2172                 bond_tlb_disable(internals);
2173                 for (i = 0; i < internals->active_slave_count; i++)
2174                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2175         }
2176
2177         internals->link_status_polling_enabled = 0;
2178         for (i = 0; i < internals->slave_count; i++)
2179                 internals->slaves[i].last_link_status = 0;
2180
2181         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2182         eth_dev->data->dev_started = 0;
2183 }
2184
2185 void
2186 bond_ethdev_close(struct rte_eth_dev *dev)
2187 {
2188         struct bond_dev_private *internals = dev->data->dev_private;
2189         uint8_t bond_port_id = internals->port_id;
2190         int skipped = 0;
2191         struct rte_flow_error ferror;
2192
2193         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2194         while (internals->slave_count != skipped) {
2195                 uint16_t port_id = internals->slaves[skipped].port_id;
2196
2197                 rte_eth_dev_stop(port_id);
2198
2199                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2200                         RTE_BOND_LOG(ERR,
2201                                      "Failed to remove port %d from bonded device %s",
2202                                      port_id, dev->device->name);
2203                         skipped++;
2204                 }
2205         }
2206         bond_flow_ops.flush(dev, &ferror);
2207         bond_ethdev_free_queues(dev);
2208         rte_bitmap_reset(internals->vlan_filter_bmp);
2209 }
2210
2211 /* forward declaration */
2212 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2213
2214 static void
2215 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2216 {
2217         struct bond_dev_private *internals = dev->data->dev_private;
2218
2219         uint16_t max_nb_rx_queues = UINT16_MAX;
2220         uint16_t max_nb_tx_queues = UINT16_MAX;
2221
2222         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2223
2224         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2225                         internals->candidate_max_rx_pktlen :
2226                         ETHER_MAX_JUMBO_FRAME_LEN;
2227
2228         /* Max number of tx/rx queues that the bonded device can support is the
2229          * minimum values of the bonded slaves, as all slaves must be capable
2230          * of supporting the same number of tx/rx queues.
2231          */
2232         if (internals->slave_count > 0) {
2233                 struct rte_eth_dev_info slave_info;
2234                 uint8_t idx;
2235
2236                 for (idx = 0; idx < internals->slave_count; idx++) {
2237                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2238                                         &slave_info);
2239
2240                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2241                                 max_nb_rx_queues = slave_info.max_rx_queues;
2242
2243                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2244                                 max_nb_tx_queues = slave_info.max_tx_queues;
2245                 }
2246         }
2247
2248         dev_info->max_rx_queues = max_nb_rx_queues;
2249         dev_info->max_tx_queues = max_nb_tx_queues;
2250
2251         /**
2252          * If dedicated hw queues enabled for link bonding device in LACP mode
2253          * then we need to reduce the maximum number of data path queues by 1.
2254          */
2255         if (internals->mode == BONDING_MODE_8023AD &&
2256                 internals->mode4.dedicated_queues.enabled == 1) {
2257                 dev_info->max_rx_queues--;
2258                 dev_info->max_tx_queues--;
2259         }
2260
2261         dev_info->min_rx_bufsize = 0;
2262
2263         dev_info->rx_offload_capa = internals->rx_offload_capa;
2264         dev_info->tx_offload_capa = internals->tx_offload_capa;
2265         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2266         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2267         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2268
2269         dev_info->reta_size = internals->reta_size;
2270 }
2271
2272 static int
2273 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2274 {
2275         int res;
2276         uint16_t i;
2277         struct bond_dev_private *internals = dev->data->dev_private;
2278
2279         /* don't do this while a slave is being added */
2280         rte_spinlock_lock(&internals->lock);
2281
2282         if (on)
2283                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2284         else
2285                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2286
2287         for (i = 0; i < internals->slave_count; i++) {
2288                 uint16_t port_id = internals->slaves[i].port_id;
2289
2290                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2291                 if (res == ENOTSUP)
2292                         RTE_BOND_LOG(WARNING,
2293                                      "Setting VLAN filter on slave port %u not supported.",
2294                                      port_id);
2295         }
2296
2297         rte_spinlock_unlock(&internals->lock);
2298         return 0;
2299 }
2300
2301 static int
2302 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2303                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2304                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2305 {
2306         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2307                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2308                                         0, dev->data->numa_node);
2309         if (bd_rx_q == NULL)
2310                 return -1;
2311
2312         bd_rx_q->queue_id = rx_queue_id;
2313         bd_rx_q->dev_private = dev->data->dev_private;
2314
2315         bd_rx_q->nb_rx_desc = nb_rx_desc;
2316
2317         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2318         bd_rx_q->mb_pool = mb_pool;
2319
2320         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2321
2322         return 0;
2323 }
2324
2325 static int
2326 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2327                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2328                 const struct rte_eth_txconf *tx_conf)
2329 {
2330         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2331                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2332                                         0, dev->data->numa_node);
2333
2334         if (bd_tx_q == NULL)
2335                 return -1;
2336
2337         bd_tx_q->queue_id = tx_queue_id;
2338         bd_tx_q->dev_private = dev->data->dev_private;
2339
2340         bd_tx_q->nb_tx_desc = nb_tx_desc;
2341         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2342
2343         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2344
2345         return 0;
2346 }
2347
2348 static void
2349 bond_ethdev_rx_queue_release(void *queue)
2350 {
2351         if (queue == NULL)
2352                 return;
2353
2354         rte_free(queue);
2355 }
2356
2357 static void
2358 bond_ethdev_tx_queue_release(void *queue)
2359 {
2360         if (queue == NULL)
2361                 return;
2362
2363         rte_free(queue);
2364 }
2365
2366 static void
2367 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2368 {
2369         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2370         struct bond_dev_private *internals;
2371
2372         /* Default value for polling slave found is true as we don't want to
2373          * disable the polling thread if we cannot get the lock */
2374         int i, polling_slave_found = 1;
2375
2376         if (cb_arg == NULL)
2377                 return;
2378
2379         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2380         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2381
2382         if (!bonded_ethdev->data->dev_started ||
2383                 !internals->link_status_polling_enabled)
2384                 return;
2385
2386         /* If device is currently being configured then don't check slaves link
2387          * status, wait until next period */
2388         if (rte_spinlock_trylock(&internals->lock)) {
2389                 if (internals->slave_count > 0)
2390                         polling_slave_found = 0;
2391
2392                 for (i = 0; i < internals->slave_count; i++) {
2393                         if (!internals->slaves[i].link_status_poll_enabled)
2394                                 continue;
2395
2396                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2397                         polling_slave_found = 1;
2398
2399                         /* Update slave link status */
2400                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2401                                         internals->slaves[i].link_status_wait_to_complete);
2402
2403                         /* if link status has changed since last checked then call lsc
2404                          * event callback */
2405                         if (slave_ethdev->data->dev_link.link_status !=
2406                                         internals->slaves[i].last_link_status) {
2407                                 internals->slaves[i].last_link_status =
2408                                                 slave_ethdev->data->dev_link.link_status;
2409
2410                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2411                                                 RTE_ETH_EVENT_INTR_LSC,
2412                                                 &bonded_ethdev->data->port_id,
2413                                                 NULL);
2414                         }
2415                 }
2416                 rte_spinlock_unlock(&internals->lock);
2417         }
2418
2419         if (polling_slave_found)
2420                 /* Set alarm to continue monitoring link status of slave ethdev's */
2421                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2422                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2423 }
2424
2425 static int
2426 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2427 {
2428         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2429
2430         struct bond_dev_private *bond_ctx;
2431         struct rte_eth_link slave_link;
2432
2433         uint32_t idx;
2434
2435         bond_ctx = ethdev->data->dev_private;
2436
2437         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2438
2439         if (ethdev->data->dev_started == 0 ||
2440                         bond_ctx->active_slave_count == 0) {
2441                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2442                 return 0;
2443         }
2444
2445         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2446
2447         if (wait_to_complete)
2448                 link_update = rte_eth_link_get;
2449         else
2450                 link_update = rte_eth_link_get_nowait;
2451
2452         switch (bond_ctx->mode) {
2453         case BONDING_MODE_BROADCAST:
2454                 /**
2455                  * Setting link speed to UINT32_MAX to ensure we pick up the
2456                  * value of the first active slave
2457                  */
2458                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2459
2460                 /**
2461                  * link speed is minimum value of all the slaves link speed as
2462                  * packet loss will occur on this slave if transmission at rates
2463                  * greater than this are attempted
2464                  */
2465                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2466                         link_update(bond_ctx->active_slaves[0], &slave_link);
2467
2468                         if (slave_link.link_speed <
2469                                         ethdev->data->dev_link.link_speed)
2470                                 ethdev->data->dev_link.link_speed =
2471                                                 slave_link.link_speed;
2472                 }
2473                 break;
2474         case BONDING_MODE_ACTIVE_BACKUP:
2475                 /* Current primary slave */
2476                 link_update(bond_ctx->current_primary_port, &slave_link);
2477
2478                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2479                 break;
2480         case BONDING_MODE_8023AD:
2481                 ethdev->data->dev_link.link_autoneg =
2482                                 bond_ctx->mode4.slave_link.link_autoneg;
2483                 ethdev->data->dev_link.link_duplex =
2484                                 bond_ctx->mode4.slave_link.link_duplex;
2485                 /* fall through to update link speed */
2486         case BONDING_MODE_ROUND_ROBIN:
2487         case BONDING_MODE_BALANCE:
2488         case BONDING_MODE_TLB:
2489         case BONDING_MODE_ALB:
2490         default:
2491                 /**
2492                  * In theses mode the maximum theoretical link speed is the sum
2493                  * of all the slaves
2494                  */
2495                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2496
2497                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2498                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2499
2500                         ethdev->data->dev_link.link_speed +=
2501                                         slave_link.link_speed;
2502                 }
2503         }
2504
2505
2506         return 0;
2507 }
2508
2509
2510 static int
2511 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2512 {
2513         struct bond_dev_private *internals = dev->data->dev_private;
2514         struct rte_eth_stats slave_stats;
2515         int i, j;
2516
2517         for (i = 0; i < internals->slave_count; i++) {
2518                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2519
2520                 stats->ipackets += slave_stats.ipackets;
2521                 stats->opackets += slave_stats.opackets;
2522                 stats->ibytes += slave_stats.ibytes;
2523                 stats->obytes += slave_stats.obytes;
2524                 stats->imissed += slave_stats.imissed;
2525                 stats->ierrors += slave_stats.ierrors;
2526                 stats->oerrors += slave_stats.oerrors;
2527                 stats->rx_nombuf += slave_stats.rx_nombuf;
2528
2529                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2530                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2531                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2532                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2533                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2534                         stats->q_errors[j] += slave_stats.q_errors[j];
2535                 }
2536
2537         }
2538
2539         return 0;
2540 }
2541
2542 static void
2543 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2544 {
2545         struct bond_dev_private *internals = dev->data->dev_private;
2546         int i;
2547
2548         for (i = 0; i < internals->slave_count; i++)
2549                 rte_eth_stats_reset(internals->slaves[i].port_id);
2550 }
2551
2552 static void
2553 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2554 {
2555         struct bond_dev_private *internals = eth_dev->data->dev_private;
2556         int i;
2557
2558         internals->promiscuous_en = 1;
2559
2560         switch (internals->mode) {
2561         /* Promiscuous mode is propagated to all slaves */
2562         case BONDING_MODE_ROUND_ROBIN:
2563         case BONDING_MODE_BALANCE:
2564         case BONDING_MODE_BROADCAST:
2565                 for (i = 0; i < internals->slave_count; i++)
2566                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2567                 break;
2568         /* In mode4 promiscus mode is managed when slave is added/removed */
2569         case BONDING_MODE_8023AD:
2570                 break;
2571         /* Promiscuous mode is propagated only to primary slave */
2572         case BONDING_MODE_ACTIVE_BACKUP:
2573         case BONDING_MODE_TLB:
2574         case BONDING_MODE_ALB:
2575         default:
2576                 rte_eth_promiscuous_enable(internals->current_primary_port);
2577         }
2578 }
2579
2580 static void
2581 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2582 {
2583         struct bond_dev_private *internals = dev->data->dev_private;
2584         int i;
2585
2586         internals->promiscuous_en = 0;
2587
2588         switch (internals->mode) {
2589         /* Promiscuous mode is propagated to all slaves */
2590         case BONDING_MODE_ROUND_ROBIN:
2591         case BONDING_MODE_BALANCE:
2592         case BONDING_MODE_BROADCAST:
2593                 for (i = 0; i < internals->slave_count; i++)
2594                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2595                 break;
2596         /* In mode4 promiscus mode is set managed when slave is added/removed */
2597         case BONDING_MODE_8023AD:
2598                 break;
2599         /* Promiscuous mode is propagated only to primary slave */
2600         case BONDING_MODE_ACTIVE_BACKUP:
2601         case BONDING_MODE_TLB:
2602         case BONDING_MODE_ALB:
2603         default:
2604                 rte_eth_promiscuous_disable(internals->current_primary_port);
2605         }
2606 }
2607
2608 static void
2609 bond_ethdev_delayed_lsc_propagation(void *arg)
2610 {
2611         if (arg == NULL)
2612                 return;
2613
2614         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2615                         RTE_ETH_EVENT_INTR_LSC, NULL);
2616 }
2617
2618 int
2619 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2620                 void *param, void *ret_param __rte_unused)
2621 {
2622         struct rte_eth_dev *bonded_eth_dev;
2623         struct bond_dev_private *internals;
2624         struct rte_eth_link link;
2625         int rc = -1;
2626
2627         int i, valid_slave = 0;
2628         uint8_t active_pos;
2629         uint8_t lsc_flag = 0;
2630
2631         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2632                 return rc;
2633
2634         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2635
2636         if (check_for_bonded_ethdev(bonded_eth_dev))
2637                 return rc;
2638
2639         internals = bonded_eth_dev->data->dev_private;
2640
2641         /* If the device isn't started don't handle interrupts */
2642         if (!bonded_eth_dev->data->dev_started)
2643                 return rc;
2644
2645         /* verify that port_id is a valid slave of bonded port */
2646         for (i = 0; i < internals->slave_count; i++) {
2647                 if (internals->slaves[i].port_id == port_id) {
2648                         valid_slave = 1;
2649                         break;
2650                 }
2651         }
2652
2653         if (!valid_slave)
2654                 return rc;
2655
2656         /* Synchronize lsc callback parallel calls either by real link event
2657          * from the slaves PMDs or by the bonding PMD itself.
2658          */
2659         rte_spinlock_lock(&internals->lsc_lock);
2660
2661         /* Search for port in active port list */
2662         active_pos = find_slave_by_id(internals->active_slaves,
2663                         internals->active_slave_count, port_id);
2664
2665         rte_eth_link_get_nowait(port_id, &link);
2666         if (link.link_status) {
2667                 if (active_pos < internals->active_slave_count)
2668                         goto link_update;
2669
2670                 /* if no active slave ports then set this port to be primary port */
2671                 if (internals->active_slave_count < 1) {
2672                         /* If first active slave, then change link status */
2673                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2674                         internals->current_primary_port = port_id;
2675                         lsc_flag = 1;
2676
2677                         mac_address_slaves_update(bonded_eth_dev);
2678                 }
2679
2680                 /* check link state properties if bonded link is up*/
2681                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2682                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2683                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2684                                              "for slave %d in bonding mode %d",
2685                                              port_id, internals->mode);
2686                 } else {
2687                         /* inherit slave link properties */
2688                         link_properties_set(bonded_eth_dev, &link);
2689                 }
2690
2691                 activate_slave(bonded_eth_dev, port_id);
2692
2693                 /* If user has defined the primary port then default to using it */
2694                 if (internals->user_defined_primary_port &&
2695                                 internals->primary_port == port_id)
2696                         bond_ethdev_primary_set(internals, port_id);
2697         } else {
2698                 if (active_pos == internals->active_slave_count)
2699                         goto link_update;
2700
2701                 /* Remove from active slave list */
2702                 deactivate_slave(bonded_eth_dev, port_id);
2703
2704                 if (internals->active_slave_count < 1)
2705                         lsc_flag = 1;
2706
2707                 /* Update primary id, take first active slave from list or if none
2708                  * available set to -1 */
2709                 if (port_id == internals->current_primary_port) {
2710                         if (internals->active_slave_count > 0)
2711                                 bond_ethdev_primary_set(internals,
2712                                                 internals->active_slaves[0]);
2713                         else
2714                                 internals->current_primary_port = internals->primary_port;
2715                 }
2716         }
2717
2718 link_update:
2719         /**
2720          * Update bonded device link properties after any change to active
2721          * slaves
2722          */
2723         bond_ethdev_link_update(bonded_eth_dev, 0);
2724
2725         if (lsc_flag) {
2726                 /* Cancel any possible outstanding interrupts if delays are enabled */
2727                 if (internals->link_up_delay_ms > 0 ||
2728                         internals->link_down_delay_ms > 0)
2729                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2730                                         bonded_eth_dev);
2731
2732                 if (bonded_eth_dev->data->dev_link.link_status) {
2733                         if (internals->link_up_delay_ms > 0)
2734                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2735                                                 bond_ethdev_delayed_lsc_propagation,
2736                                                 (void *)bonded_eth_dev);
2737                         else
2738                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2739                                                 RTE_ETH_EVENT_INTR_LSC,
2740                                                 NULL);
2741
2742                 } else {
2743                         if (internals->link_down_delay_ms > 0)
2744                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2745                                                 bond_ethdev_delayed_lsc_propagation,
2746                                                 (void *)bonded_eth_dev);
2747                         else
2748                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2749                                                 RTE_ETH_EVENT_INTR_LSC,
2750                                                 NULL);
2751                 }
2752         }
2753
2754         rte_spinlock_unlock(&internals->lsc_lock);
2755
2756         return rc;
2757 }
2758
2759 static int
2760 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2761                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2762 {
2763         unsigned i, j;
2764         int result = 0;
2765         int slave_reta_size;
2766         unsigned reta_count;
2767         struct bond_dev_private *internals = dev->data->dev_private;
2768
2769         if (reta_size != internals->reta_size)
2770                 return -EINVAL;
2771
2772          /* Copy RETA table */
2773         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2774
2775         for (i = 0; i < reta_count; i++) {
2776                 internals->reta_conf[i].mask = reta_conf[i].mask;
2777                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2778                         if ((reta_conf[i].mask >> j) & 0x01)
2779                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2780         }
2781
2782         /* Fill rest of array */
2783         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2784                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2785                                 sizeof(internals->reta_conf[0]) * reta_count);
2786
2787         /* Propagate RETA over slaves */
2788         for (i = 0; i < internals->slave_count; i++) {
2789                 slave_reta_size = internals->slaves[i].reta_size;
2790                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2791                                 &internals->reta_conf[0], slave_reta_size);
2792                 if (result < 0)
2793                         return result;
2794         }
2795
2796         return 0;
2797 }
2798
2799 static int
2800 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2801                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2802 {
2803         int i, j;
2804         struct bond_dev_private *internals = dev->data->dev_private;
2805
2806         if (reta_size != internals->reta_size)
2807                 return -EINVAL;
2808
2809          /* Copy RETA table */
2810         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2811                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2812                         if ((reta_conf[i].mask >> j) & 0x01)
2813                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2814
2815         return 0;
2816 }
2817
2818 static int
2819 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2820                 struct rte_eth_rss_conf *rss_conf)
2821 {
2822         int i, result = 0;
2823         struct bond_dev_private *internals = dev->data->dev_private;
2824         struct rte_eth_rss_conf bond_rss_conf;
2825
2826         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2827
2828         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2829
2830         if (bond_rss_conf.rss_hf != 0)
2831                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2832
2833         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2834                         sizeof(internals->rss_key)) {
2835                 if (bond_rss_conf.rss_key_len == 0)
2836                         bond_rss_conf.rss_key_len = 40;
2837                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2838                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2839                                 internals->rss_key_len);
2840         }
2841
2842         for (i = 0; i < internals->slave_count; i++) {
2843                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2844                                 &bond_rss_conf);
2845                 if (result < 0)
2846                         return result;
2847         }
2848
2849         return 0;
2850 }
2851
2852 static int
2853 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2854                 struct rte_eth_rss_conf *rss_conf)
2855 {
2856         struct bond_dev_private *internals = dev->data->dev_private;
2857
2858         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2859         rss_conf->rss_key_len = internals->rss_key_len;
2860         if (rss_conf->rss_key)
2861                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2862
2863         return 0;
2864 }
2865
2866 static int
2867 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2868 {
2869         struct rte_eth_dev *slave_eth_dev;
2870         struct bond_dev_private *internals = dev->data->dev_private;
2871         int ret, i;
2872
2873         rte_spinlock_lock(&internals->lock);
2874
2875         for (i = 0; i < internals->slave_count; i++) {
2876                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2877                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2878                         rte_spinlock_unlock(&internals->lock);
2879                         return -ENOTSUP;
2880                 }
2881         }
2882         for (i = 0; i < internals->slave_count; i++) {
2883                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2884                 if (ret < 0) {
2885                         rte_spinlock_unlock(&internals->lock);
2886                         return ret;
2887                 }
2888         }
2889
2890         rte_spinlock_unlock(&internals->lock);
2891         return 0;
2892 }
2893
2894 static int
2895 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2896 {
2897         if (mac_address_set(dev, addr)) {
2898                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2899                 return -EINVAL;
2900         }
2901
2902         return 0;
2903 }
2904
2905 static int
2906 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2907                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2908 {
2909         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2910                 *(const void **)arg = &bond_flow_ops;
2911                 return 0;
2912         }
2913         return -ENOTSUP;
2914 }
2915
2916 static int
2917 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2918                                 __rte_unused uint32_t index, uint32_t vmdq)
2919 {
2920         struct rte_eth_dev *slave_eth_dev;
2921         struct bond_dev_private *internals = dev->data->dev_private;
2922         int ret, i;
2923
2924         rte_spinlock_lock(&internals->lock);
2925
2926         for (i = 0; i < internals->slave_count; i++) {
2927                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2928                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2929                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2930                         ret = -ENOTSUP;
2931                         goto end;
2932                 }
2933         }
2934
2935         for (i = 0; i < internals->slave_count; i++) {
2936                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2937                                 mac_addr, vmdq);
2938                 if (ret < 0) {
2939                         /* rollback */
2940                         for (i--; i >= 0; i--)
2941                                 rte_eth_dev_mac_addr_remove(
2942                                         internals->slaves[i].port_id, mac_addr);
2943                         goto end;
2944                 }
2945         }
2946
2947         ret = 0;
2948 end:
2949         rte_spinlock_unlock(&internals->lock);
2950         return ret;
2951 }
2952
2953 static void
2954 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2955 {
2956         struct rte_eth_dev *slave_eth_dev;
2957         struct bond_dev_private *internals = dev->data->dev_private;
2958         int i;
2959
2960         rte_spinlock_lock(&internals->lock);
2961
2962         for (i = 0; i < internals->slave_count; i++) {
2963                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2964                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2965                         goto end;
2966         }
2967
2968         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2969
2970         for (i = 0; i < internals->slave_count; i++)
2971                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2972                                 mac_addr);
2973
2974 end:
2975         rte_spinlock_unlock(&internals->lock);
2976 }
2977
2978 const struct eth_dev_ops default_dev_ops = {
2979         .dev_start            = bond_ethdev_start,
2980         .dev_stop             = bond_ethdev_stop,
2981         .dev_close            = bond_ethdev_close,
2982         .dev_configure        = bond_ethdev_configure,
2983         .dev_infos_get        = bond_ethdev_info,
2984         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2985         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2986         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2987         .rx_queue_release     = bond_ethdev_rx_queue_release,
2988         .tx_queue_release     = bond_ethdev_tx_queue_release,
2989         .link_update          = bond_ethdev_link_update,
2990         .stats_get            = bond_ethdev_stats_get,
2991         .stats_reset          = bond_ethdev_stats_reset,
2992         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2993         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2994         .reta_update          = bond_ethdev_rss_reta_update,
2995         .reta_query           = bond_ethdev_rss_reta_query,
2996         .rss_hash_update      = bond_ethdev_rss_hash_update,
2997         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2998         .mtu_set              = bond_ethdev_mtu_set,
2999         .mac_addr_set         = bond_ethdev_mac_address_set,
3000         .mac_addr_add         = bond_ethdev_mac_addr_add,
3001         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3002         .filter_ctrl          = bond_filter_ctrl
3003 };
3004
3005 static int
3006 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3007 {
3008         const char *name = rte_vdev_device_name(dev);
3009         uint8_t socket_id = dev->device.numa_node;
3010         struct bond_dev_private *internals = NULL;
3011         struct rte_eth_dev *eth_dev = NULL;
3012         uint32_t vlan_filter_bmp_size;
3013
3014         /* now do all data allocation - for eth_dev structure, dummy pci driver
3015          * and internal (private) data
3016          */
3017
3018         /* reserve an ethdev entry */
3019         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3020         if (eth_dev == NULL) {
3021                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3022                 goto err;
3023         }
3024
3025         internals = eth_dev->data->dev_private;
3026         eth_dev->data->nb_rx_queues = (uint16_t)1;
3027         eth_dev->data->nb_tx_queues = (uint16_t)1;
3028
3029         /* Allocate memory for storing MAC addresses */
3030         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3031                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3032         if (eth_dev->data->mac_addrs == NULL) {
3033                 RTE_BOND_LOG(ERR,
3034                              "Failed to allocate %u bytes needed to store MAC addresses",
3035                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3036                 goto err;
3037         }
3038
3039         eth_dev->dev_ops = &default_dev_ops;
3040         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3041
3042         rte_spinlock_init(&internals->lock);
3043         rte_spinlock_init(&internals->lsc_lock);
3044
3045         internals->port_id = eth_dev->data->port_id;
3046         internals->mode = BONDING_MODE_INVALID;
3047         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3048         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3049         internals->burst_xmit_hash = burst_xmit_l2_hash;
3050         internals->user_defined_mac = 0;
3051
3052         internals->link_status_polling_enabled = 0;
3053
3054         internals->link_status_polling_interval_ms =
3055                 DEFAULT_POLLING_INTERVAL_10_MS;
3056         internals->link_down_delay_ms = 0;
3057         internals->link_up_delay_ms = 0;
3058
3059         internals->slave_count = 0;
3060         internals->active_slave_count = 0;
3061         internals->rx_offload_capa = 0;
3062         internals->tx_offload_capa = 0;
3063         internals->rx_queue_offload_capa = 0;
3064         internals->tx_queue_offload_capa = 0;
3065         internals->candidate_max_rx_pktlen = 0;
3066         internals->max_rx_pktlen = 0;
3067
3068         /* Initially allow to choose any offload type */
3069         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3070
3071         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3072         memset(internals->slaves, 0, sizeof(internals->slaves));
3073
3074         TAILQ_INIT(&internals->flow_list);
3075         internals->flow_isolated_valid = 0;
3076
3077         /* Set mode 4 default configuration */
3078         bond_mode_8023ad_setup(eth_dev, NULL);
3079         if (bond_ethdev_mode_set(eth_dev, mode)) {
3080                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3081                                  eth_dev->data->port_id, mode);
3082                 goto err;
3083         }
3084
3085         vlan_filter_bmp_size =
3086                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3087         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3088                                                    RTE_CACHE_LINE_SIZE);
3089         if (internals->vlan_filter_bmpmem == NULL) {
3090                 RTE_BOND_LOG(ERR,
3091                              "Failed to allocate vlan bitmap for bonded device %u",
3092                              eth_dev->data->port_id);
3093                 goto err;
3094         }
3095
3096         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3097                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3098         if (internals->vlan_filter_bmp == NULL) {
3099                 RTE_BOND_LOG(ERR,
3100                              "Failed to init vlan bitmap for bonded device %u",
3101                              eth_dev->data->port_id);
3102                 rte_free(internals->vlan_filter_bmpmem);
3103                 goto err;
3104         }
3105
3106         return eth_dev->data->port_id;
3107
3108 err:
3109         rte_free(internals);
3110         if (eth_dev != NULL) {
3111                 rte_free(eth_dev->data->mac_addrs);
3112                 rte_eth_dev_release_port(eth_dev);
3113         }
3114         return -1;
3115 }
3116
3117 static int
3118 bond_probe(struct rte_vdev_device *dev)
3119 {
3120         const char *name;
3121         struct bond_dev_private *internals;
3122         struct rte_kvargs *kvlist;
3123         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3124         int  arg_count, port_id;
3125         uint8_t agg_mode;
3126         struct rte_eth_dev *eth_dev;
3127
3128         if (!dev)
3129                 return -EINVAL;
3130
3131         name = rte_vdev_device_name(dev);
3132         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3133
3134         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3135             strlen(rte_vdev_device_args(dev)) == 0) {
3136                 eth_dev = rte_eth_dev_attach_secondary(name);
3137                 if (!eth_dev) {
3138                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3139                         return -1;
3140                 }
3141                 /* TODO: request info from primary to set up Rx and Tx */
3142                 eth_dev->dev_ops = &default_dev_ops;
3143                 eth_dev->device = &dev->device;
3144                 rte_eth_dev_probing_finish(eth_dev);
3145                 return 0;
3146         }
3147
3148         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3149                 pmd_bond_init_valid_arguments);
3150         if (kvlist == NULL)
3151                 return -1;
3152
3153         /* Parse link bonding mode */
3154         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3155                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3156                                 &bond_ethdev_parse_slave_mode_kvarg,
3157                                 &bonding_mode) != 0) {
3158                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3159                                         name);
3160                         goto parse_error;
3161                 }
3162         } else {
3163                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3164                                 "device %s", name);
3165                 goto parse_error;
3166         }
3167
3168         /* Parse socket id to create bonding device on */
3169         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3170         if (arg_count == 1) {
3171                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3172                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3173                                 != 0) {
3174                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3175                                         "bonded device %s", name);
3176                         goto parse_error;
3177                 }
3178         } else if (arg_count > 1) {
3179                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3180                                 "bonded device %s", name);
3181                 goto parse_error;
3182         } else {
3183                 socket_id = rte_socket_id();
3184         }
3185
3186         dev->device.numa_node = socket_id;
3187
3188         /* Create link bonding eth device */
3189         port_id = bond_alloc(dev, bonding_mode);
3190         if (port_id < 0) {
3191                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3192                                 "socket %u.",   name, bonding_mode, socket_id);
3193                 goto parse_error;
3194         }
3195         internals = rte_eth_devices[port_id].data->dev_private;
3196         internals->kvlist = kvlist;
3197
3198         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3199
3200         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3201                 if (rte_kvargs_process(kvlist,
3202                                 PMD_BOND_AGG_MODE_KVARG,
3203                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3204                                 &agg_mode) != 0) {
3205                         RTE_BOND_LOG(ERR,
3206                                         "Failed to parse agg selection mode for bonded device %s",
3207                                         name);
3208                         goto parse_error;
3209                 }
3210
3211                 if (internals->mode == BONDING_MODE_8023AD)
3212                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3213                                         agg_mode);
3214         } else {
3215                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3216         }
3217
3218         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3219                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3220         return 0;
3221
3222 parse_error:
3223         rte_kvargs_free(kvlist);
3224
3225         return -1;
3226 }
3227
3228 static int
3229 bond_remove(struct rte_vdev_device *dev)
3230 {
3231         struct rte_eth_dev *eth_dev;
3232         struct bond_dev_private *internals;
3233         const char *name;
3234
3235         if (!dev)
3236                 return -EINVAL;
3237
3238         name = rte_vdev_device_name(dev);
3239         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3240
3241         /* now free all data allocation - for eth_dev structure,
3242          * dummy pci driver and internal (private) data
3243          */
3244
3245         /* find an ethdev entry */
3246         eth_dev = rte_eth_dev_allocated(name);
3247         if (eth_dev == NULL)
3248                 return -ENODEV;
3249
3250         RTE_ASSERT(eth_dev->device == &dev->device);
3251
3252         internals = eth_dev->data->dev_private;
3253         if (internals->slave_count != 0)
3254                 return -EBUSY;
3255
3256         if (eth_dev->data->dev_started == 1) {
3257                 bond_ethdev_stop(eth_dev);
3258                 bond_ethdev_close(eth_dev);
3259         }
3260
3261         eth_dev->dev_ops = NULL;
3262         eth_dev->rx_pkt_burst = NULL;
3263         eth_dev->tx_pkt_burst = NULL;
3264
3265         internals = eth_dev->data->dev_private;
3266         /* Try to release mempool used in mode6. If the bond
3267          * device is not mode6, free the NULL is not problem.
3268          */
3269         rte_mempool_free(internals->mode6.mempool);
3270         rte_bitmap_free(internals->vlan_filter_bmp);
3271         rte_free(internals->vlan_filter_bmpmem);
3272         rte_free(eth_dev->data->dev_private);
3273         rte_free(eth_dev->data->mac_addrs);
3274
3275         rte_eth_dev_release_port(eth_dev);
3276
3277         return 0;
3278 }
3279
3280 /* this part will resolve the slave portids after all the other pdev and vdev
3281  * have been allocated */
3282 static int
3283 bond_ethdev_configure(struct rte_eth_dev *dev)
3284 {
3285         const char *name = dev->device->name;
3286         struct bond_dev_private *internals = dev->data->dev_private;
3287         struct rte_kvargs *kvlist = internals->kvlist;
3288         int arg_count;
3289         uint16_t port_id = dev - rte_eth_devices;
3290         uint8_t agg_mode;
3291
3292         static const uint8_t default_rss_key[40] = {
3293                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3294                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3295                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3296                 0xBE, 0xAC, 0x01, 0xFA
3297         };
3298
3299         unsigned i, j;
3300
3301         /*
3302          * If RSS is enabled, fill table with default values and
3303          * set key to the the value specified in port RSS configuration.
3304          * Fall back to default RSS key if the key is not specified
3305          */
3306         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3307                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3308                         internals->rss_key_len =
3309                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3310                         memcpy(internals->rss_key,
3311                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3312                                internals->rss_key_len);
3313                 } else {
3314                         internals->rss_key_len = sizeof(default_rss_key);
3315                         memcpy(internals->rss_key, default_rss_key,
3316                                internals->rss_key_len);
3317                 }
3318
3319                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3320                         internals->reta_conf[i].mask = ~0LL;
3321                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3322                                 internals->reta_conf[i].reta[j] =
3323                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3324                                                 dev->data->nb_rx_queues;
3325                 }
3326         }
3327
3328         /* set the max_rx_pktlen */
3329         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3330
3331         /*
3332          * if no kvlist, it means that this bonded device has been created
3333          * through the bonding api.
3334          */
3335         if (!kvlist)
3336                 return 0;
3337
3338         /* Parse MAC address for bonded device */
3339         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3340         if (arg_count == 1) {
3341                 struct ether_addr bond_mac;
3342
3343                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3344                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3345                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3346                                      name);
3347                         return -1;
3348                 }
3349
3350                 /* Set MAC address */
3351                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3352                         RTE_BOND_LOG(ERR,
3353                                      "Failed to set mac address on bonded device %s",
3354                                      name);
3355                         return -1;
3356                 }
3357         } else if (arg_count > 1) {
3358                 RTE_BOND_LOG(ERR,
3359                              "MAC address can be specified only once for bonded device %s",
3360                              name);
3361                 return -1;
3362         }
3363
3364         /* Parse/set balance mode transmit policy */
3365         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3366         if (arg_count == 1) {
3367                 uint8_t xmit_policy;
3368
3369                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3370                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3371                     0) {
3372                         RTE_BOND_LOG(INFO,
3373                                      "Invalid xmit policy specified for bonded device %s",
3374                                      name);
3375                         return -1;
3376                 }
3377
3378                 /* Set balance mode transmit policy*/
3379                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3380                         RTE_BOND_LOG(ERR,
3381                                      "Failed to set balance xmit policy on bonded device %s",
3382                                      name);
3383                         return -1;
3384                 }
3385         } else if (arg_count > 1) {
3386                 RTE_BOND_LOG(ERR,
3387                              "Transmit policy can be specified only once for bonded device %s",
3388                              name);
3389                 return -1;
3390         }
3391
3392         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3393                 if (rte_kvargs_process(kvlist,
3394                                        PMD_BOND_AGG_MODE_KVARG,
3395                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3396                                        &agg_mode) != 0) {
3397                         RTE_BOND_LOG(ERR,
3398                                      "Failed to parse agg selection mode for bonded device %s",
3399                                      name);
3400                 }
3401                 if (internals->mode == BONDING_MODE_8023AD)
3402                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3403                                                               agg_mode);
3404         }
3405
3406         /* Parse/add slave ports to bonded device */
3407         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3408                 struct bond_ethdev_slave_ports slave_ports;
3409                 unsigned i;
3410
3411                 memset(&slave_ports, 0, sizeof(slave_ports));
3412
3413                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3414                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3415                         RTE_BOND_LOG(ERR,
3416                                      "Failed to parse slave ports for bonded device %s",
3417                                      name);
3418                         return -1;
3419                 }
3420
3421                 for (i = 0; i < slave_ports.slave_count; i++) {
3422                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3423                                 RTE_BOND_LOG(ERR,
3424                                              "Failed to add port %d as slave to bonded device %s",
3425                                              slave_ports.slaves[i], name);
3426                         }
3427                 }
3428
3429         } else {
3430                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3431                 return -1;
3432         }
3433
3434         /* Parse/set primary slave port id*/
3435         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3436         if (arg_count == 1) {
3437                 uint16_t primary_slave_port_id;
3438
3439                 if (rte_kvargs_process(kvlist,
3440                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3441                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3442                                        &primary_slave_port_id) < 0) {
3443                         RTE_BOND_LOG(INFO,
3444                                      "Invalid primary slave port id specified for bonded device %s",
3445                                      name);
3446                         return -1;
3447                 }
3448
3449                 /* Set balance mode transmit policy*/
3450                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3451                     != 0) {
3452                         RTE_BOND_LOG(ERR,
3453                                      "Failed to set primary slave port %d on bonded device %s",
3454                                      primary_slave_port_id, name);
3455                         return -1;
3456                 }
3457         } else if (arg_count > 1) {
3458                 RTE_BOND_LOG(INFO,
3459                              "Primary slave can be specified only once for bonded device %s",
3460                              name);
3461                 return -1;
3462         }
3463
3464         /* Parse link status monitor polling interval */
3465         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3466         if (arg_count == 1) {
3467                 uint32_t lsc_poll_interval_ms;
3468
3469                 if (rte_kvargs_process(kvlist,
3470                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3471                                        &bond_ethdev_parse_time_ms_kvarg,
3472                                        &lsc_poll_interval_ms) < 0) {
3473                         RTE_BOND_LOG(INFO,
3474                                      "Invalid lsc polling interval value specified for bonded"
3475                                      " device %s", name);
3476                         return -1;
3477                 }
3478
3479                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3480                     != 0) {
3481                         RTE_BOND_LOG(ERR,
3482                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3483                                      lsc_poll_interval_ms, name);
3484                         return -1;
3485                 }
3486         } else if (arg_count > 1) {
3487                 RTE_BOND_LOG(INFO,
3488                              "LSC polling interval can be specified only once for bonded"
3489                              " device %s", name);
3490                 return -1;
3491         }
3492
3493         /* Parse link up interrupt propagation delay */
3494         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3495         if (arg_count == 1) {
3496                 uint32_t link_up_delay_ms;
3497
3498                 if (rte_kvargs_process(kvlist,
3499                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3500                                        &bond_ethdev_parse_time_ms_kvarg,
3501                                        &link_up_delay_ms) < 0) {
3502                         RTE_BOND_LOG(INFO,
3503                                      "Invalid link up propagation delay value specified for"
3504                                      " bonded device %s", name);
3505                         return -1;
3506                 }
3507
3508                 /* Set balance mode transmit policy*/
3509                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3510                     != 0) {
3511                         RTE_BOND_LOG(ERR,
3512                                      "Failed to set link up propagation delay (%u ms) on bonded"
3513                                      " device %s", link_up_delay_ms, name);
3514                         return -1;
3515                 }
3516         } else if (arg_count > 1) {
3517                 RTE_BOND_LOG(INFO,
3518                              "Link up propagation delay can be specified only once for"
3519                              " bonded device %s", name);
3520                 return -1;
3521         }
3522
3523         /* Parse link down interrupt propagation delay */
3524         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3525         if (arg_count == 1) {
3526                 uint32_t link_down_delay_ms;
3527
3528                 if (rte_kvargs_process(kvlist,
3529                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3530                                        &bond_ethdev_parse_time_ms_kvarg,
3531                                        &link_down_delay_ms) < 0) {
3532                         RTE_BOND_LOG(INFO,
3533                                      "Invalid link down propagation delay value specified for"
3534                                      " bonded device %s", name);
3535                         return -1;
3536                 }
3537
3538                 /* Set balance mode transmit policy*/
3539                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3540                     != 0) {
3541                         RTE_BOND_LOG(ERR,
3542                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3543                                      link_down_delay_ms, name);
3544                         return -1;
3545                 }
3546         } else if (arg_count > 1) {
3547                 RTE_BOND_LOG(INFO,
3548                              "Link down propagation delay can be specified only once for  bonded device %s",
3549                              name);
3550                 return -1;
3551         }
3552
3553         return 0;
3554 }
3555
3556 struct rte_vdev_driver pmd_bond_drv = {
3557         .probe = bond_probe,
3558         .remove = bond_remove,
3559 };
3560
3561 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3562 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3563
3564 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3565         "slave=<ifc> "
3566         "primary=<ifc> "
3567         "mode=[0-6] "
3568         "xmit_policy=[l2 | l23 | l34] "
3569         "agg_mode=[count | stable | bandwidth] "
3570         "socket_id=<int> "
3571         "mac=<mac addr> "
3572         "lsc_poll_period_ms=<int> "
3573         "up_delay=<int> "
3574         "down_delay=<int>");
3575
3576 int bond_logtype;
3577
3578 RTE_INIT(bond_init_log)
3579 {
3580         bond_logtype = rte_log_register("pmd.net.bon");
3581         if (bond_logtype >= 0)
3582                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3583 }