bond: change warning
[dpdk.git] / lib / librte_pmd_bond / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
44 #include <rte_dev.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
47
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
51
52 #define REORDER_PERIOD_MS 10
53
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
55
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
58
59 static uint16_t
60 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
61 {
62         struct bond_dev_private *internals;
63
64         uint16_t num_rx_slave = 0;
65         uint16_t num_rx_total = 0;
66
67         int i;
68
69         /* Cast to structure, containing bonded device's port id and queue id */
70         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
71
72         internals = bd_rx_q->dev_private;
73
74
75         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
76                 /* Offset of pointer to *bufs increases as packets are received
77                  * from other slaves */
78                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
79                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
80                 if (num_rx_slave) {
81                         num_rx_total += num_rx_slave;
82                         nb_pkts -= num_rx_slave;
83                 }
84         }
85
86         return num_rx_total;
87 }
88
89 static uint16_t
90 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
91                 uint16_t nb_pkts)
92 {
93         struct bond_dev_private *internals;
94
95         /* Cast to structure, containing bonded device's port id and queue id */
96         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
97
98         internals = bd_rx_q->dev_private;
99
100         return rte_eth_rx_burst(internals->current_primary_port,
101                         bd_rx_q->queue_id, bufs, nb_pkts);
102 }
103
104 static uint16_t
105 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
106                 uint16_t nb_pkts)
107 {
108         /* Cast to structure, containing bonded device's port id and queue id */
109         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
110         struct bond_dev_private *internals = bd_rx_q->dev_private;
111         struct ether_addr bond_mac;
112
113         struct ether_hdr *hdr;
114
115         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
116         uint16_t num_rx_total = 0;      /* Total number of received packets */
117         uint8_t slaves[RTE_MAX_ETHPORTS];
118         uint8_t slave_count;
119
120         uint8_t collecting;  /* current slave collecting status */
121         const uint8_t promisc = internals->promiscuous_en;
122         uint8_t i, j, k;
123
124         rte_eth_macaddr_get(internals->port_id, &bond_mac);
125         /* Copy slave list to protect against slave up/down changes during tx
126          * bursting */
127         slave_count = internals->active_slave_count;
128         memcpy(slaves, internals->active_slaves,
129                         sizeof(internals->active_slaves[0]) * slave_count);
130
131         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
132                 j = num_rx_total;
133                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
134
135                 /* Read packets from this slave */
136                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
137                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
138
139                 for (k = j; k < 2 && k < num_rx_total; k++)
140                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
141
142                 /* Handle slow protocol packets. */
143                 while (j < num_rx_total) {
144                         if (j + 3 < num_rx_total)
145                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
146
147                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
148                         /* Remove packet from array if it is slow packet or slave is not
149                          * in collecting state or bondign interface is not in promiscus
150                          * mode and packet address does not match. */
151                         if (unlikely(hdr->ether_type == ether_type_slow_be ||
152                                 !collecting || (!promisc &&
153                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
154
155                                 if (hdr->ether_type == ether_type_slow_be) {
156                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
157                                                 bufs[j]);
158                                 } else
159                                         rte_pktmbuf_free(bufs[j]);
160
161                                 /* Packet is managed by mode 4 or dropped, shift the array */
162                                 num_rx_total--;
163                                 if (j < num_rx_total) {
164                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
165                                                 (num_rx_total - j));
166                                 }
167                         } else
168                                 j++;
169                 }
170         }
171
172         return num_rx_total;
173 }
174
175 static uint16_t
176 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
177                 uint16_t nb_pkts)
178 {
179         struct bond_dev_private *internals;
180         struct bond_tx_queue *bd_tx_q;
181
182         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
183         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
184
185         uint8_t num_of_slaves;
186         uint8_t slaves[RTE_MAX_ETHPORTS];
187
188         uint16_t num_tx_total = 0, num_tx_slave;
189
190         static int slave_idx = 0;
191         int i, cslave_idx = 0, tx_fail_total = 0;
192
193         bd_tx_q = (struct bond_tx_queue *)queue;
194         internals = bd_tx_q->dev_private;
195
196         /* Copy slave list to protect against slave up/down changes during tx
197          * bursting */
198         num_of_slaves = internals->active_slave_count;
199         memcpy(slaves, internals->active_slaves,
200                         sizeof(internals->active_slaves[0]) * num_of_slaves);
201
202         if (num_of_slaves < 1)
203                 return num_tx_total;
204
205         /* Populate slaves mbuf with which packets are to be sent on it  */
206         for (i = 0; i < nb_pkts; i++) {
207                 cslave_idx = (slave_idx + i) % num_of_slaves;
208                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
209         }
210
211         /* increment current slave index so the next call to tx burst starts on the
212          * next slave */
213         slave_idx = ++cslave_idx;
214
215         /* Send packet burst on each slave device */
216         for (i = 0; i < num_of_slaves; i++) {
217                 if (slave_nb_pkts[i] > 0) {
218                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
219                                         slave_bufs[i], slave_nb_pkts[i]);
220
221                         /* if tx burst fails move packets to end of bufs */
222                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
223                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
224
225                                 tx_fail_total += tx_fail_slave;
226
227                                 memcpy(&bufs[nb_pkts - tx_fail_total],
228                                                 &slave_bufs[i][num_tx_slave],
229                                                 tx_fail_slave * sizeof(bufs[0]));
230                         }
231                         num_tx_total += num_tx_slave;
232                 }
233         }
234
235         return num_tx_total;
236 }
237
238 static uint16_t
239 bond_ethdev_tx_burst_active_backup(void *queue,
240                 struct rte_mbuf **bufs, uint16_t nb_pkts)
241 {
242         struct bond_dev_private *internals;
243         struct bond_tx_queue *bd_tx_q;
244
245         bd_tx_q = (struct bond_tx_queue *)queue;
246         internals = bd_tx_q->dev_private;
247
248         if (internals->active_slave_count < 1)
249                 return 0;
250
251         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
252                         bufs, nb_pkts);
253 }
254
255 static inline uint16_t
256 ether_hash(struct ether_hdr *eth_hdr)
257 {
258         uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
259         uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
260
261         return (word_src_addr[0] ^ word_dst_addr[0]) ^
262                         (word_src_addr[1] ^ word_dst_addr[1]) ^
263                         (word_src_addr[2] ^ word_dst_addr[2]);
264 }
265
266 static inline uint32_t
267 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
268 {
269         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
270 }
271
272 static inline uint32_t
273 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
274 {
275         uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
276         uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
277
278         return (word_src_addr[0] ^ word_dst_addr[0]) ^
279                         (word_src_addr[1] ^ word_dst_addr[1]) ^
280                         (word_src_addr[2] ^ word_dst_addr[2]) ^
281                         (word_src_addr[3] ^ word_dst_addr[3]);
282 }
283
284 static inline size_t
285 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
286 {
287         size_t vlan_offset = 0;
288
289         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
290                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
291                 vlan_offset = sizeof(struct vlan_hdr);
292                 *proto = vlan_hdr->eth_proto;
293
294                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
295                         vlan_hdr = vlan_hdr + 1;
296
297                         *proto = vlan_hdr->eth_proto;
298                         vlan_offset += sizeof(struct vlan_hdr);
299                 }
300         }
301         return vlan_offset;
302 }
303
304 uint16_t
305 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
306 {
307         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
308
309         uint32_t hash = ether_hash(eth_hdr);
310
311         return (hash ^= hash >> 8) % slave_count;
312 }
313
314 uint16_t
315 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
316 {
317         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
318         uint16_t proto = eth_hdr->ether_type;
319         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
320         uint32_t hash, l3hash = 0;
321
322         hash = ether_hash(eth_hdr);
323
324         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
325                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
326                                 ((char *)(eth_hdr + 1) + vlan_offset);
327                 l3hash = ipv4_hash(ipv4_hdr);
328
329         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
330                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
331                                 ((char *)(eth_hdr + 1) + vlan_offset);
332                 l3hash = ipv6_hash(ipv6_hdr);
333         }
334
335         hash = hash ^ l3hash;
336         hash ^= hash >> 16;
337         hash ^= hash >> 8;
338
339         return hash % slave_count;
340 }
341
342 uint16_t
343 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
344 {
345         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
346         uint16_t proto = eth_hdr->ether_type;
347         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
348
349         struct udp_hdr *udp_hdr = NULL;
350         struct tcp_hdr *tcp_hdr = NULL;
351         uint32_t hash, l3hash = 0, l4hash = 0;
352
353         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
354                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
355                                 ((char *)(eth_hdr + 1) + vlan_offset);
356                 size_t ip_hdr_offset;
357
358                 l3hash = ipv4_hash(ipv4_hdr);
359
360                 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
361                                 IPV4_IHL_MULTIPLIER;
362
363                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
364                         tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
365                                         ip_hdr_offset);
366                         l4hash = HASH_L4_PORTS(tcp_hdr);
367                 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
368                         udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
369                                         ip_hdr_offset);
370                         l4hash = HASH_L4_PORTS(udp_hdr);
371                 }
372         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
373                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
374                                 ((char *)(eth_hdr + 1) + vlan_offset);
375                 l3hash = ipv6_hash(ipv6_hdr);
376
377                 if (ipv6_hdr->proto == IPPROTO_TCP) {
378                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
379                         l4hash = HASH_L4_PORTS(tcp_hdr);
380                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
381                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
382                         l4hash = HASH_L4_PORTS(udp_hdr);
383                 }
384         }
385
386         hash = l3hash ^ l4hash;
387         hash ^= hash >> 16;
388         hash ^= hash >> 8;
389
390         return hash % slave_count;
391 }
392
393 struct bwg_slave {
394         uint64_t bwg_left_int;
395         uint64_t bwg_left_remainder;
396         uint8_t slave;
397 };
398
399 static int
400 bandwidth_cmp(const void *a, const void *b)
401 {
402         const struct bwg_slave *bwg_a = a;
403         const struct bwg_slave *bwg_b = b;
404         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
405         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
406                         (int64_t)bwg_a->bwg_left_remainder;
407         if (diff > 0)
408                 return 1;
409         else if (diff < 0)
410                 return -1;
411         else if (diff2 > 0)
412                 return 1;
413         else if (diff2 < 0)
414                 return -1;
415         else
416                 return 0;
417 }
418
419 static void
420 bandwidth_left(int port_id, uint64_t load, uint8_t update_idx,
421                 struct bwg_slave *bwg_slave)
422 {
423         struct rte_eth_link link_status;
424
425         rte_eth_link_get(port_id, &link_status);
426         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
427         if (link_bwg == 0)
428                 return;
429         link_bwg = (link_bwg * (update_idx+1) * REORDER_PERIOD_MS);
430         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
431         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
432 }
433
434 static void
435 bond_ethdev_update_tlb_slave_cb(void *arg)
436 {
437         struct bond_dev_private *internals = arg;
438         struct rte_eth_stats slave_stats;
439         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
440         uint8_t slave_count;
441         uint64_t tx_bytes;
442
443         uint8_t update_stats = 0;
444         uint8_t i, slave_id;
445
446         internals->slave_update_idx++;
447
448
449         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
450                 update_stats = 1;
451
452         for (i = 0; i < internals->active_slave_count; i++) {
453                 slave_id = internals->active_slaves[i];
454                 rte_eth_stats_get(slave_id, &slave_stats);
455                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
456                 bandwidth_left(slave_id, tx_bytes,
457                                 internals->slave_update_idx, &bwg_array[i]);
458                 bwg_array[i].slave = slave_id;
459
460                 if (update_stats)
461                         tlb_last_obytets[slave_id] = slave_stats.obytes;
462         }
463
464         if (update_stats == 1)
465                 internals->slave_update_idx = 0;
466
467         slave_count = i;
468         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
469         for (i = 0; i < slave_count; i++)
470                 internals->active_slaves[i] = bwg_array[i].slave;
471
472         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
473                         (struct bond_dev_private *)internals);
474 }
475
476 static uint16_t
477 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
478 {
479         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
480         struct bond_dev_private *internals = bd_tx_q->dev_private;
481
482         struct rte_eth_dev *primary_port =
483                         &rte_eth_devices[internals->primary_port];
484         uint16_t num_tx_total = 0;
485         uint8_t i, j;
486
487         uint8_t num_of_slaves = internals->active_slave_count;
488         uint8_t slaves[RTE_MAX_ETHPORTS];
489
490         struct ether_hdr *ether_hdr;
491         struct ether_addr primary_slave_addr;
492         struct ether_addr active_slave_addr;
493
494         if (num_of_slaves < 1)
495                 return num_tx_total;
496
497         memcpy(slaves, internals->active_slaves,
498                                 sizeof(internals->active_slaves[0]) * num_of_slaves);
499
500
501         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
502
503         if (nb_pkts > 3) {
504                 for (i = 0; i < 3; i++)
505                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
506         }
507
508         for (i = 0; i < num_of_slaves; i++) {
509                 ether_addr_copy(&internals->slaves[slaves[i]].persisted_mac_addr,
510                                 &active_slave_addr);
511
512                 for (j = num_tx_total; j < nb_pkts; j++) {
513                         if (j + 3 < nb_pkts)
514                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
515
516                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
517                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
518                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
519                 }
520
521                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
522                                 bufs + num_tx_total, nb_pkts - num_tx_total);
523
524                 if (num_tx_total == nb_pkts)
525                         break;
526         }
527
528         return num_tx_total;
529 }
530
531 static uint16_t
532 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
533                 uint16_t nb_pkts)
534 {
535         struct bond_dev_private *internals;
536         struct bond_tx_queue *bd_tx_q;
537
538         uint8_t num_of_slaves;
539         uint8_t slaves[RTE_MAX_ETHPORTS];
540
541         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
542
543         int i, op_slave_id;
544
545         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
546         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
547
548         bd_tx_q = (struct bond_tx_queue *)queue;
549         internals = bd_tx_q->dev_private;
550
551         /* Copy slave list to protect against slave up/down changes during tx
552          * bursting */
553         num_of_slaves = internals->active_slave_count;
554         memcpy(slaves, internals->active_slaves,
555                         sizeof(internals->active_slaves[0]) * num_of_slaves);
556
557         if (num_of_slaves < 1)
558                 return num_tx_total;
559
560         /* Populate slaves mbuf with the packets which are to be sent on it  */
561         for (i = 0; i < nb_pkts; i++) {
562                 /* Select output slave using hash based on xmit policy */
563                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
564
565                 /* Populate slave mbuf arrays with mbufs for that slave */
566                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
567         }
568
569         /* Send packet burst on each slave device */
570         for (i = 0; i < num_of_slaves; i++) {
571                 if (slave_nb_pkts[i] > 0) {
572                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
573                                         slave_bufs[i], slave_nb_pkts[i]);
574
575                         /* if tx burst fails move packets to end of bufs */
576                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
577                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
578
579                                 tx_fail_total += slave_tx_fail_count;
580                                 memcpy(&bufs[nb_pkts - tx_fail_total],
581                                                 &slave_bufs[i][num_tx_slave],
582                                                 slave_tx_fail_count * sizeof(bufs[0]));
583                         }
584
585                         num_tx_total += num_tx_slave;
586                 }
587         }
588
589         return num_tx_total;
590 }
591
592 static uint16_t
593 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
594                 uint16_t nb_pkts)
595 {
596         struct bond_dev_private *internals;
597         struct bond_tx_queue *bd_tx_q;
598
599         uint8_t num_of_slaves;
600         uint8_t slaves[RTE_MAX_ETHPORTS];
601          /* positions in slaves, not ID */
602         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
603         uint8_t distributing_count;
604
605         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
606         uint16_t i, j, op_slave_idx;
607         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
608
609         /* Allocate additional packets in case 8023AD mode. */
610         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
611         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
612
613         /* Total amount of packets in slave_bufs */
614         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
615         /* Slow packets placed in each slave */
616         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
617
618         bd_tx_q = (struct bond_tx_queue *)queue;
619         internals = bd_tx_q->dev_private;
620
621         /* Copy slave list to protect against slave up/down changes during tx
622          * bursting */
623         num_of_slaves = internals->active_slave_count;
624         if (num_of_slaves < 1)
625                 return num_tx_total;
626
627         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
628
629         distributing_count = 0;
630         for (i = 0; i < num_of_slaves; i++) {
631                 struct port *port = &mode_8023ad_ports[slaves[i]];
632
633                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
634                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
635                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
636
637                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
638                         slave_bufs[i][j] = slow_pkts[j];
639
640                 if (ACTOR_STATE(port, DISTRIBUTING))
641                         distributing_offsets[distributing_count++] = i;
642         }
643
644         if (likely(distributing_count > 0)) {
645                 /* Populate slaves mbuf with the packets which are to be sent on it */
646                 for (i = 0; i < nb_pkts; i++) {
647                         /* Select output slave using hash based on xmit policy */
648                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
649
650                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
651                          * slaves that are currently distributing. */
652                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
653                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
654                         slave_nb_pkts[slave_offset]++;
655                 }
656         }
657
658         /* Send packet burst on each slave device */
659         for (i = 0; i < num_of_slaves; i++) {
660                 if (slave_nb_pkts[i] == 0)
661                         continue;
662
663                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
664                                 slave_bufs[i], slave_nb_pkts[i]);
665
666                 /* If tx burst fails drop slow packets */
667                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
668                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
669
670                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
671                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
672
673                 /* If tx burst fails move packets to end of bufs */
674                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
675                         uint16_t j = nb_pkts - num_tx_fail_total;
676                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
677                                 bufs[j] = slave_bufs[i][num_tx_slave];
678                 }
679         }
680
681         return num_tx_total;
682 }
683
684 #ifdef RTE_MBUF_REFCNT
685 static uint16_t
686 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
687                 uint16_t nb_pkts)
688 {
689         struct bond_dev_private *internals;
690         struct bond_tx_queue *bd_tx_q;
691
692         uint8_t tx_failed_flag = 0, num_of_slaves;
693         uint8_t slaves[RTE_MAX_ETHPORTS];
694
695         uint16_t max_nb_of_tx_pkts = 0;
696
697         int slave_tx_total[RTE_MAX_ETHPORTS];
698         int i, most_successful_tx_slave = -1;
699
700         bd_tx_q = (struct bond_tx_queue *)queue;
701         internals = bd_tx_q->dev_private;
702
703         /* Copy slave list to protect against slave up/down changes during tx
704          * bursting */
705         num_of_slaves = internals->active_slave_count;
706         memcpy(slaves, internals->active_slaves,
707                         sizeof(internals->active_slaves[0]) * num_of_slaves);
708
709         if (num_of_slaves < 1)
710                 return 0;
711
712         /* Increment reference count on mbufs */
713         for (i = 0; i < nb_pkts; i++)
714                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
715
716         /* Transmit burst on each active slave */
717         for (i = 0; i < num_of_slaves; i++) {
718                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
719                                         bufs, nb_pkts);
720
721                 if (unlikely(slave_tx_total[i] < nb_pkts))
722                         tx_failed_flag = 1;
723
724                 /* record the value and slave index for the slave which transmits the
725                  * maximum number of packets */
726                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
727                         max_nb_of_tx_pkts = slave_tx_total[i];
728                         most_successful_tx_slave = i;
729                 }
730         }
731
732         /* if slaves fail to transmit packets from burst, the calling application
733          * is not expected to know about multiple references to packets so we must
734          * handle failures of all packets except those of the most successful slave
735          */
736         if (unlikely(tx_failed_flag))
737                 for (i = 0; i < num_of_slaves; i++)
738                         if (i != most_successful_tx_slave)
739                                 while (slave_tx_total[i] < nb_pkts)
740                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
741
742         return max_nb_of_tx_pkts;
743 }
744 #endif
745
746 void
747 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
748                 struct rte_eth_link *slave_dev_link)
749 {
750         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
751         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
752
753         if (slave_dev_link->link_status &&
754                 bonded_eth_dev->data->dev_started) {
755                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
756                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
757
758                 internals->link_props_set = 1;
759         }
760 }
761
762 void
763 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
764 {
765         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
766
767         memset(&(bonded_eth_dev->data->dev_link), 0,
768                         sizeof(bonded_eth_dev->data->dev_link));
769
770         internals->link_props_set = 0;
771 }
772
773 int
774 link_properties_valid(struct rte_eth_link *bonded_dev_link,
775                 struct rte_eth_link *slave_dev_link)
776 {
777         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
778                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
779                 return -1;
780
781         return 0;
782 }
783
784 int
785 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
786 {
787         struct ether_addr *mac_addr;
788
789         if (eth_dev == NULL) {
790                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
791                 return -1;
792         }
793
794         if (dst_mac_addr == NULL) {
795                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
796                 return -1;
797         }
798
799         mac_addr = eth_dev->data->mac_addrs;
800
801         ether_addr_copy(mac_addr, dst_mac_addr);
802         return 0;
803 }
804
805 int
806 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
807 {
808         struct ether_addr *mac_addr;
809
810         if (eth_dev == NULL) {
811                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
812                 return -1;
813         }
814
815         if (new_mac_addr == NULL) {
816                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
817                 return -1;
818         }
819
820         mac_addr = eth_dev->data->mac_addrs;
821
822         /* If new MAC is different to current MAC then update */
823         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
824                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
825
826         return 0;
827 }
828
829 int
830 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
831 {
832         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
833         int i;
834
835         /* Update slave devices MAC addresses */
836         if (internals->slave_count < 1)
837                 return -1;
838
839         switch (internals->mode) {
840         case BONDING_MODE_ROUND_ROBIN:
841         case BONDING_MODE_BALANCE:
842 #ifdef RTE_MBUF_REFCNT
843         case BONDING_MODE_BROADCAST:
844 #endif
845                 for (i = 0; i < internals->slave_count; i++) {
846                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
847                                         bonded_eth_dev->data->mac_addrs)) {
848                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
849                                                 internals->slaves[i].port_id);
850                                 return -1;
851                         }
852                 }
853                 break;
854         case BONDING_MODE_8023AD:
855                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
856                 break;
857         case BONDING_MODE_ACTIVE_BACKUP:
858         case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
859         default:
860                 for (i = 0; i < internals->slave_count; i++) {
861                         if (internals->slaves[i].port_id ==
862                                         internals->current_primary_port) {
863                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
864                                                 bonded_eth_dev->data->mac_addrs)) {
865                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
866                                                         internals->current_primary_port);
867                                         return -1;
868                                 }
869                         } else {
870                                 if (mac_address_set(
871                                                 &rte_eth_devices[internals->slaves[i].port_id],
872                                                 &internals->slaves[i].persisted_mac_addr)) {
873                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
874                                                         internals->slaves[i].port_id);
875                                         return -1;
876                                 }
877                         }
878                 }
879         }
880
881         return 0;
882 }
883
884 int
885 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
886 {
887         struct bond_dev_private *internals;
888
889         internals = eth_dev->data->dev_private;
890
891         switch (mode) {
892         case BONDING_MODE_ROUND_ROBIN:
893                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
894                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
895                 break;
896         case BONDING_MODE_ACTIVE_BACKUP:
897                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
898                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
899                 break;
900         case BONDING_MODE_BALANCE:
901                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
902                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
903                 break;
904 #ifdef RTE_MBUF_REFCNT
905         case BONDING_MODE_BROADCAST:
906                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
907                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
908                 break;
909 #endif
910         case BONDING_MODE_8023AD:
911                 if (bond_mode_8023ad_enable(eth_dev) != 0)
912                         return -1;
913
914                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
915                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
916                 RTE_LOG(WARNING, PMD,
917                                 "Using mode 4, it is necessary to do TX burst and RX burst "
918                                 "at least every 100ms.\n");
919                 break;
920         case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
921                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
922                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
923                 break;
924         default:
925                 return -1;
926         }
927
928         internals->mode = mode;
929
930         return 0;
931 }
932
933 int
934 slave_configure(struct rte_eth_dev *bonded_eth_dev,
935                 struct rte_eth_dev *slave_eth_dev)
936 {
937         struct bond_rx_queue *bd_rx_q;
938         struct bond_tx_queue *bd_tx_q;
939
940         int errval, q_id;
941
942         /* Stop slave */
943         rte_eth_dev_stop(slave_eth_dev->data->port_id);
944
945         /* Enable interrupts on slave device if supported */
946         if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
947                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
948
949         /* Configure device */
950         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
951                         bonded_eth_dev->data->nb_rx_queues,
952                         bonded_eth_dev->data->nb_tx_queues,
953                         &(slave_eth_dev->data->dev_conf));
954         if (errval != 0) {
955                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
956                                 slave_eth_dev->data->port_id, errval);
957                 return errval;
958         }
959
960         /* Setup Rx Queues */
961         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
962                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
963
964                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
965                                 bd_rx_q->nb_rx_desc,
966                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
967                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
968                 if (errval != 0) {
969                         RTE_BOND_LOG(ERR,
970                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
971                                         slave_eth_dev->data->port_id, q_id, errval);
972                         return errval;
973                 }
974         }
975
976         /* Setup Tx Queues */
977         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
978                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
979
980                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
981                                 bd_tx_q->nb_tx_desc,
982                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
983                                 &bd_tx_q->tx_conf);
984                 if (errval != 0) {
985                         RTE_BOND_LOG(ERR,
986                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
987                                         slave_eth_dev->data->port_id, q_id, errval);
988                         return errval;
989                 }
990         }
991
992         /* Start device */
993         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
994         if (errval != 0) {
995                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
996                                 slave_eth_dev->data->port_id, errval);
997                 return -1;
998         }
999
1000         return 0;
1001 }
1002
1003 void
1004 slave_remove(struct bond_dev_private *internals,
1005                 struct rte_eth_dev *slave_eth_dev)
1006 {
1007         uint8_t i;
1008
1009         for (i = 0; i < internals->slave_count; i++)
1010                 if (internals->slaves[i].port_id ==
1011                                 slave_eth_dev->data->port_id)
1012                         break;
1013
1014         if (i < (internals->slave_count - 1))
1015                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1016                                 sizeof(internals->slaves[0]) *
1017                                 (internals->slave_count - i - 1));
1018
1019         internals->slave_count--;
1020 }
1021
1022 static void
1023 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1024
1025 void
1026 slave_add(struct bond_dev_private *internals,
1027                 struct rte_eth_dev *slave_eth_dev)
1028 {
1029         struct bond_slave_details *slave_details =
1030                         &internals->slaves[internals->slave_count];
1031
1032         slave_details->port_id = slave_eth_dev->data->port_id;
1033         slave_details->last_link_status = 0;
1034
1035         /* If slave device doesn't support interrupts then we need to enabled
1036          * polling to monitor link status */
1037         if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
1038                 slave_details->link_status_poll_enabled = 1;
1039
1040                 if (!internals->link_status_polling_enabled) {
1041                         internals->link_status_polling_enabled = 1;
1042
1043                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1044                                         bond_ethdev_slave_link_status_change_monitor,
1045                                         (void *)&rte_eth_devices[internals->port_id]);
1046                 }
1047         }
1048
1049         slave_details->link_status_wait_to_complete = 0;
1050         /* clean tlb_last_obytes when adding port for bonding device */
1051         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1052                         sizeof(struct ether_addr));
1053 }
1054
1055 void
1056 bond_ethdev_primary_set(struct bond_dev_private *internals,
1057                 uint8_t slave_port_id)
1058 {
1059         int i;
1060
1061         if (internals->active_slave_count < 1)
1062                 internals->current_primary_port = slave_port_id;
1063         else
1064                 /* Search bonded device slave ports for new proposed primary port */
1065                 for (i = 0; i < internals->active_slave_count; i++) {
1066                         if (internals->active_slaves[i] == slave_port_id)
1067                                 internals->current_primary_port = slave_port_id;
1068                 }
1069 }
1070
1071 static void
1072 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1073
1074 static int
1075 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1076 {
1077         struct bond_dev_private *internals;
1078         int i;
1079
1080         /* slave eth dev will be started by bonded device */
1081         if (valid_bonded_ethdev(eth_dev)) {
1082                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1083                                 eth_dev->data->port_id);
1084                 return -1;
1085         }
1086
1087         eth_dev->data->dev_link.link_status = 0;
1088         eth_dev->data->dev_started = 1;
1089
1090         internals = eth_dev->data->dev_private;
1091
1092         if (internals->slave_count == 0) {
1093                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1094                 return -1;
1095         }
1096
1097         if (internals->user_defined_mac == 0) {
1098                 struct ether_addr *new_mac_addr = NULL;
1099
1100                 for (i = 0; i < internals->slave_count; i++)
1101                         if (internals->slaves[i].port_id == internals->primary_port)
1102                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1103
1104                 if (new_mac_addr == NULL)
1105                         return -1;
1106
1107                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1108                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1109                                         eth_dev->data->port_id);
1110                         return -1;
1111                 }
1112         }
1113
1114         /* Update all slave devices MACs*/
1115         if (mac_address_slaves_update(eth_dev) != 0)
1116                 return -1;
1117
1118         /* If bonded device is configure in promiscuous mode then re-apply config */
1119         if (internals->promiscuous_en)
1120                 bond_ethdev_promiscuous_enable(eth_dev);
1121
1122         /* Reconfigure each slave device if starting bonded device */
1123         for (i = 0; i < internals->slave_count; i++) {
1124                 if (slave_configure(eth_dev,
1125                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1126                         RTE_BOND_LOG(ERR,
1127                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1128                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1129                         return -1;
1130                 }
1131         }
1132
1133         if (internals->user_defined_primary_port)
1134                 bond_ethdev_primary_set(internals, internals->primary_port);
1135
1136         if (internals->mode == BONDING_MODE_8023AD)
1137                 bond_mode_8023ad_start(eth_dev);
1138
1139         if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING)
1140                 bond_ethdev_update_tlb_slave_cb(internals);
1141
1142         return 0;
1143 }
1144
1145 static void
1146 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1147 {
1148         struct bond_dev_private *internals = eth_dev->data->dev_private;
1149         uint8_t i;
1150
1151         if (internals->mode == BONDING_MODE_8023AD) {
1152                 struct port *port;
1153                 void *pkt = NULL;
1154
1155                 bond_mode_8023ad_stop(eth_dev);
1156
1157                 /* Discard all messages to/from mode 4 state machines */
1158                 for (i = 0; i < internals->slave_count; i++) {
1159                         port = &mode_8023ad_ports[internals->slaves[i].port_id];
1160
1161                         RTE_VERIFY(port->rx_ring != NULL);
1162                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1163                                 rte_pktmbuf_free(pkt);
1164
1165                         RTE_VERIFY(port->tx_ring != NULL);
1166                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1167                                 rte_pktmbuf_free(pkt);
1168                 }
1169         }
1170
1171         if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) {
1172                 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1173         }
1174
1175         internals->active_slave_count = 0;
1176         internals->link_status_polling_enabled = 0;
1177
1178         eth_dev->data->dev_link.link_status = 0;
1179         eth_dev->data->dev_started = 0;
1180 }
1181
1182 static void
1183 bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
1184 {
1185 }
1186
1187 /* forward declaration */
1188 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1189
1190 static void
1191 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1192 {
1193         struct bond_dev_private *internals = dev->data->dev_private;
1194
1195         dev_info->driver_name = driver_name;
1196         dev_info->max_mac_addrs = 1;
1197
1198         dev_info->max_rx_pktlen = (uint32_t)2048;
1199
1200         dev_info->max_rx_queues = (uint16_t)128;
1201         dev_info->max_tx_queues = (uint16_t)512;
1202
1203         dev_info->min_rx_bufsize = 0;
1204         dev_info->pci_dev = dev->pci_dev;
1205
1206         dev_info->rx_offload_capa = internals->rx_offload_capa;
1207         dev_info->tx_offload_capa = internals->tx_offload_capa;
1208 }
1209
1210 static int
1211 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1212                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1213                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1214 {
1215         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1216                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1217                                         0, dev->pci_dev->numa_node);
1218         if (bd_rx_q == NULL)
1219                 return -1;
1220
1221         bd_rx_q->queue_id = rx_queue_id;
1222         bd_rx_q->dev_private = dev->data->dev_private;
1223
1224         bd_rx_q->nb_rx_desc = nb_rx_desc;
1225
1226         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1227         bd_rx_q->mb_pool = mb_pool;
1228
1229         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1230
1231         return 0;
1232 }
1233
1234 static int
1235 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1236                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1237                 const struct rte_eth_txconf *tx_conf)
1238 {
1239         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1240                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1241                                         0, dev->pci_dev->numa_node);
1242
1243         if (bd_tx_q == NULL)
1244                 return -1;
1245
1246         bd_tx_q->queue_id = tx_queue_id;
1247         bd_tx_q->dev_private = dev->data->dev_private;
1248
1249         bd_tx_q->nb_tx_desc = nb_tx_desc;
1250         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1251
1252         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1253
1254         return 0;
1255 }
1256
1257 static void
1258 bond_ethdev_rx_queue_release(void *queue)
1259 {
1260         if (queue == NULL)
1261                 return;
1262
1263         rte_free(queue);
1264 }
1265
1266 static void
1267 bond_ethdev_tx_queue_release(void *queue)
1268 {
1269         if (queue == NULL)
1270                 return;
1271
1272         rte_free(queue);
1273 }
1274
1275 static void
1276 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1277 {
1278         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1279         struct bond_dev_private *internals;
1280
1281         /* Default value for polling slave found is true as we don't want to
1282          * disable the polling thread if we cannot get the lock */
1283         int i, polling_slave_found = 1;
1284
1285         if (cb_arg == NULL)
1286                 return;
1287
1288         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1289         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1290
1291         if (!bonded_ethdev->data->dev_started ||
1292                 !internals->link_status_polling_enabled)
1293                 return;
1294
1295         /* If device is currently being configured then don't check slaves link
1296          * status, wait until next period */
1297         if (rte_spinlock_trylock(&internals->lock)) {
1298                 if (internals->slave_count > 0)
1299                         polling_slave_found = 0;
1300
1301                 for (i = 0; i < internals->slave_count; i++) {
1302                         if (!internals->slaves[i].link_status_poll_enabled)
1303                                 continue;
1304
1305                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1306                         polling_slave_found = 1;
1307
1308                         /* Update slave link status */
1309                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1310                                         internals->slaves[i].link_status_wait_to_complete);
1311
1312                         /* if link status has changed since last checked then call lsc
1313                          * event callback */
1314                         if (slave_ethdev->data->dev_link.link_status !=
1315                                         internals->slaves[i].last_link_status) {
1316                                 internals->slaves[i].last_link_status =
1317                                                 slave_ethdev->data->dev_link.link_status;
1318
1319                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1320                                                 RTE_ETH_EVENT_INTR_LSC,
1321                                                 &bonded_ethdev->data->port_id);
1322                         }
1323                 }
1324                 rte_spinlock_unlock(&internals->lock);
1325         }
1326
1327         if (polling_slave_found)
1328                 /* Set alarm to continue monitoring link status of slave ethdev's */
1329                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1330                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1331 }
1332
1333 static int
1334 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1335                 int wait_to_complete)
1336 {
1337         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1338
1339         if (!bonded_eth_dev->data->dev_started ||
1340                 internals->active_slave_count == 0) {
1341                 bonded_eth_dev->data->dev_link.link_status = 0;
1342                 return 0;
1343         } else {
1344                 struct rte_eth_dev *slave_eth_dev;
1345                 int i, link_up = 0;
1346
1347                 for (i = 0; i < internals->active_slave_count; i++) {
1348                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1349
1350                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1351                                         wait_to_complete);
1352                         if (slave_eth_dev->data->dev_link.link_status == 1) {
1353                                 link_up = 1;
1354                                 break;
1355                         }
1356                 }
1357
1358                 bonded_eth_dev->data->dev_link.link_status = link_up;
1359         }
1360
1361         return 0;
1362 }
1363
1364 static void
1365 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1366 {
1367         struct bond_dev_private *internals = dev->data->dev_private;
1368         struct rte_eth_stats slave_stats;
1369         int i;
1370
1371         for (i = 0; i < internals->slave_count; i++) {
1372                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1373
1374                 stats->ipackets += slave_stats.ipackets;
1375                 stats->opackets += slave_stats.opackets;
1376                 stats->ibytes += slave_stats.ibytes;
1377                 stats->obytes += slave_stats.obytes;
1378                 stats->ierrors += slave_stats.ierrors;
1379                 stats->oerrors += slave_stats.oerrors;
1380                 stats->imcasts += slave_stats.imcasts;
1381                 stats->rx_nombuf += slave_stats.rx_nombuf;
1382                 stats->fdirmatch += slave_stats.fdirmatch;
1383                 stats->fdirmiss += slave_stats.fdirmiss;
1384                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1385                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1386                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1387                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1388         }
1389 }
1390
1391 static void
1392 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1393 {
1394         struct bond_dev_private *internals = dev->data->dev_private;
1395         int i;
1396
1397         for (i = 0; i < internals->slave_count; i++)
1398                 rte_eth_stats_reset(internals->slaves[i].port_id);
1399 }
1400
1401 static void
1402 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1403 {
1404         struct bond_dev_private *internals = eth_dev->data->dev_private;
1405         int i;
1406
1407         internals->promiscuous_en = 1;
1408
1409         switch (internals->mode) {
1410         /* Promiscuous mode is propagated to all slaves */
1411         case BONDING_MODE_ROUND_ROBIN:
1412         case BONDING_MODE_BALANCE:
1413 #ifdef RTE_MBUF_REFCNT
1414         case BONDING_MODE_BROADCAST:
1415 #endif
1416                 for (i = 0; i < internals->slave_count; i++)
1417                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1418                 break;
1419         /* In mode4 promiscus mode is managed when slave is added/removed */
1420         case BONDING_MODE_8023AD:
1421                 break;
1422         /* Promiscuous mode is propagated only to primary slave */
1423         case BONDING_MODE_ACTIVE_BACKUP:
1424         case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
1425         default:
1426                 rte_eth_promiscuous_enable(internals->current_primary_port);
1427         }
1428 }
1429
1430 static void
1431 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1432 {
1433         struct bond_dev_private *internals = dev->data->dev_private;
1434         int i;
1435
1436         internals->promiscuous_en = 0;
1437
1438         switch (internals->mode) {
1439         /* Promiscuous mode is propagated to all slaves */
1440         case BONDING_MODE_ROUND_ROBIN:
1441         case BONDING_MODE_BALANCE:
1442 #ifdef RTE_MBUF_REFCNT
1443         case BONDING_MODE_BROADCAST:
1444 #endif
1445                 for (i = 0; i < internals->slave_count; i++)
1446                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1447                 break;
1448         /* In mode4 promiscus mode is set managed when slave is added/removed */
1449         case BONDING_MODE_8023AD:
1450                 break;
1451         /* Promiscuous mode is propagated only to primary slave */
1452         case BONDING_MODE_ACTIVE_BACKUP:
1453         case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
1454         default:
1455                 rte_eth_promiscuous_disable(internals->current_primary_port);
1456         }
1457 }
1458
1459 static void
1460 bond_ethdev_delayed_lsc_propagation(void *arg)
1461 {
1462         if (arg == NULL)
1463                 return;
1464
1465         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1466                         RTE_ETH_EVENT_INTR_LSC);
1467 }
1468
1469 void
1470 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1471                 void *param)
1472 {
1473         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1474         struct bond_dev_private *internals;
1475         struct rte_eth_link link;
1476
1477         int i, valid_slave = 0;
1478         uint8_t active_pos;
1479         uint8_t lsc_flag = 0;
1480
1481         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1482                 return;
1483
1484         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1485         slave_eth_dev = &rte_eth_devices[port_id];
1486
1487         if (valid_bonded_ethdev(bonded_eth_dev))
1488                 return;
1489
1490         internals = bonded_eth_dev->data->dev_private;
1491
1492         /* If the device isn't started don't handle interrupts */
1493         if (!bonded_eth_dev->data->dev_started)
1494                 return;
1495
1496         /* verify that port_id is a valid slave of bonded port */
1497         for (i = 0; i < internals->slave_count; i++) {
1498                 if (internals->slaves[i].port_id == port_id) {
1499                         valid_slave = 1;
1500                         break;
1501                 }
1502         }
1503
1504         if (!valid_slave)
1505                 return;
1506
1507         /* Search for port in active port list */
1508         active_pos = find_slave_by_id(internals->active_slaves,
1509                         internals->active_slave_count, port_id);
1510
1511         rte_eth_link_get_nowait(port_id, &link);
1512         if (link.link_status) {
1513                 if (active_pos < internals->active_slave_count)
1514                         return;
1515
1516                 /* if no active slave ports then set this port to be primary port */
1517                 if (internals->active_slave_count < 1) {
1518                         /* If first active slave, then change link status */
1519                         bonded_eth_dev->data->dev_link.link_status = 1;
1520                         internals->current_primary_port = port_id;
1521                         lsc_flag = 1;
1522
1523                         mac_address_slaves_update(bonded_eth_dev);
1524
1525                         /* Inherit eth dev link properties from first active slave */
1526                         link_properties_set(bonded_eth_dev,
1527                                         &(slave_eth_dev->data->dev_link));
1528                 }
1529
1530                 activate_slave(bonded_eth_dev, port_id);
1531
1532                 /* If user has defined the primary port then default to using it */
1533                 if (internals->user_defined_primary_port &&
1534                                 internals->primary_port == port_id)
1535                         bond_ethdev_primary_set(internals, port_id);
1536         } else {
1537                 if (active_pos == internals->active_slave_count)
1538                         return;
1539
1540                 /* Remove from active slave list */
1541                 deactivate_slave(bonded_eth_dev, port_id);
1542
1543                 /* No active slaves, change link status to down and reset other
1544                  * link properties */
1545                 if (internals->active_slave_count < 1) {
1546                         lsc_flag = 1;
1547                         bonded_eth_dev->data->dev_link.link_status = 0;
1548
1549                         link_properties_reset(bonded_eth_dev);
1550                 }
1551
1552                 /* Update primary id, take first active slave from list or if none
1553                  * available set to -1 */
1554                 if (port_id == internals->current_primary_port) {
1555                         if (internals->active_slave_count > 0)
1556                                 bond_ethdev_primary_set(internals,
1557                                                 internals->active_slaves[0]);
1558                         else
1559                                 internals->current_primary_port = internals->primary_port;
1560                 }
1561         }
1562
1563         if (lsc_flag) {
1564                 /* Cancel any possible outstanding interrupts if delays are enabled */
1565                 if (internals->link_up_delay_ms > 0 ||
1566                         internals->link_down_delay_ms > 0)
1567                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
1568                                         bonded_eth_dev);
1569
1570                 if (bonded_eth_dev->data->dev_link.link_status) {
1571                         if (internals->link_up_delay_ms > 0)
1572                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
1573                                                 bond_ethdev_delayed_lsc_propagation,
1574                                                 (void *)bonded_eth_dev);
1575                         else
1576                                 _rte_eth_dev_callback_process(bonded_eth_dev,
1577                                                 RTE_ETH_EVENT_INTR_LSC);
1578
1579                 } else {
1580                         if (internals->link_down_delay_ms > 0)
1581                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
1582                                                 bond_ethdev_delayed_lsc_propagation,
1583                                                 (void *)bonded_eth_dev);
1584                         else
1585                                 _rte_eth_dev_callback_process(bonded_eth_dev,
1586                                                 RTE_ETH_EVENT_INTR_LSC);
1587                 }
1588         }
1589 }
1590
1591 struct eth_dev_ops default_dev_ops = {
1592                 .dev_start = bond_ethdev_start,
1593                 .dev_stop = bond_ethdev_stop,
1594                 .dev_close = bond_ethdev_close,
1595                 .dev_configure = bond_ethdev_configure,
1596                 .dev_infos_get = bond_ethdev_info,
1597                 .rx_queue_setup = bond_ethdev_rx_queue_setup,
1598                 .tx_queue_setup = bond_ethdev_tx_queue_setup,
1599                 .rx_queue_release = bond_ethdev_rx_queue_release,
1600                 .tx_queue_release = bond_ethdev_tx_queue_release,
1601                 .link_update = bond_ethdev_link_update,
1602                 .stats_get = bond_ethdev_stats_get,
1603                 .stats_reset = bond_ethdev_stats_reset,
1604                 .promiscuous_enable = bond_ethdev_promiscuous_enable,
1605                 .promiscuous_disable = bond_ethdev_promiscuous_disable
1606 };
1607
1608 static int
1609 bond_init(const char *name, const char *params)
1610 {
1611         struct bond_dev_private *internals;
1612         struct rte_kvargs *kvlist;
1613         uint8_t bonding_mode, socket_id;
1614         int  arg_count, port_id;
1615
1616         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
1617
1618         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
1619         if (kvlist == NULL)
1620                 return -1;
1621
1622         /* Parse link bonding mode */
1623         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
1624                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
1625                                 &bond_ethdev_parse_slave_mode_kvarg,
1626                                 &bonding_mode) != 0) {
1627                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
1628                                         name);
1629                         goto parse_error;
1630                 }
1631         } else {
1632                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
1633                                 "device %s\n", name);
1634                 goto parse_error;
1635         }
1636
1637         /* Parse socket id to create bonding device on */
1638         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
1639         if (arg_count == 1) {
1640                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
1641                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
1642                                 != 0) {
1643                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
1644                                         "bonded device %s\n", name);
1645                         goto parse_error;
1646                 }
1647         } else if (arg_count > 1) {
1648                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
1649                                 "bonded device %s\n", name);
1650                 goto parse_error;
1651         } else {
1652                 socket_id = rte_socket_id();
1653         }
1654
1655         /* Create link bonding eth device */
1656         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
1657         if (port_id < 0) {
1658                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
1659                                 "socket %u.\n", name, bonding_mode, socket_id);
1660                 goto parse_error;
1661         }
1662         internals = rte_eth_devices[port_id].data->dev_private;
1663         internals->kvlist = kvlist;
1664
1665         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
1666                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
1667         return 0;
1668
1669 parse_error:
1670         rte_kvargs_free(kvlist);
1671
1672         return -1;
1673 }
1674
1675 /* this part will resolve the slave portids after all the other pdev and vdev
1676  * have been allocated */
1677 static int
1678 bond_ethdev_configure(struct rte_eth_dev *dev)
1679 {
1680         char *name = dev->data->name;
1681         struct bond_dev_private *internals = dev->data->dev_private;
1682         struct rte_kvargs *kvlist = internals->kvlist;
1683         int arg_count, port_id = dev - rte_eth_devices;
1684
1685         /*
1686          * if no kvlist, it means that this bonded device has been created
1687          * through the bonding api.
1688          */
1689         if (!kvlist)
1690                 return 0;
1691
1692         /* Parse MAC address for bonded device */
1693         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
1694         if (arg_count == 1) {
1695                 struct ether_addr bond_mac;
1696
1697                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
1698                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
1699                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
1700                                         name);
1701                         return -1;
1702                 }
1703
1704                 /* Set MAC address */
1705                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
1706                         RTE_LOG(ERR, EAL,
1707                                         "Failed to set mac address on bonded device %s\n",
1708                                         name);
1709                         return -1;
1710                 }
1711         } else if (arg_count > 1) {
1712                 RTE_LOG(ERR, EAL,
1713                                 "MAC address can be specified only once for bonded device %s\n",
1714                                 name);
1715                 return -1;
1716         }
1717
1718         /* Parse/set balance mode transmit policy */
1719         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
1720         if (arg_count == 1) {
1721                 uint8_t xmit_policy;
1722
1723                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
1724                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
1725                                                 0) {
1726                         RTE_LOG(INFO, EAL,
1727                                         "Invalid xmit policy specified for bonded device %s\n",
1728                                         name);
1729                         return -1;
1730                 }
1731
1732                 /* Set balance mode transmit policy*/
1733                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
1734                         RTE_LOG(ERR, EAL,
1735                                         "Failed to set balance xmit policy on bonded device %s\n",
1736                                         name);
1737                         return -1;
1738                 }
1739         } else if (arg_count > 1) {
1740                 RTE_LOG(ERR, EAL,
1741                                 "Transmit policy can be specified only once for bonded device"
1742                                 " %s\n", name);
1743                 return -1;
1744         }
1745
1746         /* Parse/add slave ports to bonded device */
1747         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
1748                 struct bond_ethdev_slave_ports slave_ports;
1749                 unsigned i;
1750
1751                 memset(&slave_ports, 0, sizeof(slave_ports));
1752
1753                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
1754                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
1755                         RTE_LOG(ERR, EAL,
1756                                         "Failed to parse slave ports for bonded device %s\n",
1757                                         name);
1758                         return -1;
1759                 }
1760
1761                 for (i = 0; i < slave_ports.slave_count; i++) {
1762                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
1763                                 RTE_LOG(ERR, EAL,
1764                                                 "Failed to add port %d as slave to bonded device %s\n",
1765                                                 slave_ports.slaves[i], name);
1766                         }
1767                 }
1768
1769         } else {
1770                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
1771                 return -1;
1772         }
1773
1774         /* Parse/set primary slave port id*/
1775         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
1776         if (arg_count == 1) {
1777                 uint8_t primary_slave_port_id;
1778
1779                 if (rte_kvargs_process(kvlist,
1780                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
1781                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
1782                                 &primary_slave_port_id) < 0) {
1783                         RTE_LOG(INFO, EAL,
1784                                         "Invalid primary slave port id specified for bonded device"
1785                                         " %s\n", name);
1786                         return -1;
1787                 }
1788
1789                 /* Set balance mode transmit policy*/
1790                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
1791                                 != 0) {
1792                         RTE_LOG(ERR, EAL,
1793                                         "Failed to set primary slave port %d on bonded device %s\n",
1794                                         primary_slave_port_id, name);
1795                         return -1;
1796                 }
1797         } else if (arg_count > 1) {
1798                 RTE_LOG(INFO, EAL,
1799                                 "Primary slave can be specified only once for bonded device"
1800                                 " %s\n", name);
1801                 return -1;
1802         }
1803
1804         /* Parse link status monitor polling interval */
1805         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
1806         if (arg_count == 1) {
1807                 uint32_t lsc_poll_interval_ms;
1808
1809                 if (rte_kvargs_process(kvlist,
1810                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
1811                                 &bond_ethdev_parse_time_ms_kvarg,
1812                                 &lsc_poll_interval_ms) < 0) {
1813                         RTE_LOG(INFO, EAL,
1814                                         "Invalid lsc polling interval value specified for bonded"
1815                                         " device %s\n", name);
1816                         return -1;
1817                 }
1818
1819                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
1820                                 != 0) {
1821                         RTE_LOG(ERR, EAL,
1822                                         "Failed to set lsc monitor polling interval (%u ms) on"
1823                                         " bonded device %s\n", lsc_poll_interval_ms, name);
1824                         return -1;
1825                 }
1826         } else if (arg_count > 1) {
1827                 RTE_LOG(INFO, EAL,
1828                                 "LSC polling interval can be specified only once for bonded"
1829                                 " device %s\n", name);
1830                 return -1;
1831         }
1832
1833         /* Parse link up interrupt propagation delay */
1834         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
1835         if (arg_count == 1) {
1836                 uint32_t link_up_delay_ms;
1837
1838                 if (rte_kvargs_process(kvlist,
1839                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
1840                                 &bond_ethdev_parse_time_ms_kvarg,
1841                                 &link_up_delay_ms) < 0) {
1842                         RTE_LOG(INFO, EAL,
1843                                         "Invalid link up propagation delay value specified for"
1844                                         " bonded device %s\n", name);
1845                         return -1;
1846                 }
1847
1848                 /* Set balance mode transmit policy*/
1849                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
1850                                 != 0) {
1851                         RTE_LOG(ERR, EAL,
1852                                         "Failed to set link up propagation delay (%u ms) on bonded"
1853                                         " device %s\n", link_up_delay_ms, name);
1854                         return -1;
1855                 }
1856         } else if (arg_count > 1) {
1857                 RTE_LOG(INFO, EAL,
1858                                 "Link up propagation delay can be specified only once for"
1859                                 " bonded device %s\n", name);
1860                 return -1;
1861         }
1862
1863         /* Parse link down interrupt propagation delay */
1864         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
1865         if (arg_count == 1) {
1866                 uint32_t link_down_delay_ms;
1867
1868                 if (rte_kvargs_process(kvlist,
1869                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
1870                                 &bond_ethdev_parse_time_ms_kvarg,
1871                                 &link_down_delay_ms) < 0) {
1872                         RTE_LOG(INFO, EAL,
1873                                         "Invalid link down propagation delay value specified for"
1874                                         " bonded device %s\n", name);
1875                         return -1;
1876                 }
1877
1878                 /* Set balance mode transmit policy*/
1879                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
1880                                 != 0) {
1881                         RTE_LOG(ERR, EAL,
1882                                         "Failed to set link down propagation delay (%u ms) on"
1883                                         " bonded device %s\n", link_down_delay_ms, name);
1884                         return -1;
1885                 }
1886         } else if (arg_count > 1) {
1887                 RTE_LOG(INFO, EAL,
1888                                 "Link down propagation delay can be specified only once for"
1889                                 " bonded device %s\n", name);
1890                 return -1;
1891         }
1892
1893         return 0;
1894 }
1895
1896 static struct rte_driver bond_drv = {
1897         .name = "eth_bond",
1898         .type = PMD_VDEV,
1899         .init = bond_init,
1900 };
1901
1902 PMD_REGISTER_DRIVER(bond_drv);