bond: support link status polling
[dpdk.git] / lib / librte_pmd_bond / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_mbuf.h>
35 #include <rte_malloc.h>
36 #include <rte_ethdev.h>
37 #include <rte_tcp.h>
38 #include <rte_udp.h>
39 #include <rte_ip.h>
40 #include <rte_devargs.h>
41 #include <rte_kvargs.h>
42 #include <rte_dev.h>
43 #include <rte_alarm.h>
44
45 #include "rte_eth_bond.h"
46 #include "rte_eth_bond_private.h"
47
48 static uint16_t
49 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
50 {
51         struct bond_dev_private *internals;
52
53         uint16_t num_rx_slave = 0;
54         uint16_t num_rx_total = 0;
55
56         int i;
57
58         /* Cast to structure, containing bonded device's port id and queue id */
59         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
60
61         internals = bd_rx_q->dev_private;
62
63
64         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
65                 /* Offset of pointer to *bufs increases as packets are received
66                  * from other slaves */
67                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
68                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
69                 if (num_rx_slave) {
70                         num_rx_total += num_rx_slave;
71                         nb_pkts -= num_rx_slave;
72                 }
73         }
74
75         return num_rx_total;
76 }
77
78 static uint16_t
79 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
80                 uint16_t nb_pkts)
81 {
82         struct bond_dev_private *internals;
83
84         /* Cast to structure, containing bonded device's port id and queue id */
85         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
86
87         internals = bd_rx_q->dev_private;
88
89         return rte_eth_rx_burst(internals->current_primary_port,
90                         bd_rx_q->queue_id, bufs, nb_pkts);
91 }
92
93 static uint16_t
94 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
95                 uint16_t nb_pkts)
96 {
97         struct bond_dev_private *internals;
98         struct bond_tx_queue *bd_tx_q;
99
100         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
101         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
102
103         uint8_t num_of_slaves;
104         uint8_t slaves[RTE_MAX_ETHPORTS];
105
106         uint16_t num_tx_total = 0, num_tx_slave;
107
108         static int slave_idx = 0;
109         int i, cslave_idx = 0, tx_fail_total = 0;
110
111         bd_tx_q = (struct bond_tx_queue *)queue;
112         internals = bd_tx_q->dev_private;
113
114         /* Copy slave list to protect against slave up/down changes during tx
115          * bursting */
116         num_of_slaves = internals->active_slave_count;
117         memcpy(slaves, internals->active_slaves,
118                         sizeof(internals->active_slaves[0]) * num_of_slaves);
119
120         if (num_of_slaves < 1)
121                 return num_tx_total;
122
123         /* Populate slaves mbuf with which packets are to be sent on it  */
124         for (i = 0; i < nb_pkts; i++) {
125                 cslave_idx = (slave_idx + i) % num_of_slaves;
126                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
127         }
128
129         /* increment current slave index so the next call to tx burst starts on the
130          * next slave */
131         slave_idx = ++cslave_idx;
132
133         /* Send packet burst on each slave device */
134         for (i = 0; i < num_of_slaves; i++) {
135                 if (slave_nb_pkts[i] > 0) {
136                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
137                                         slave_bufs[i], slave_nb_pkts[i]);
138
139                         /* if tx burst fails move packets to end of bufs */
140                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
141                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
142
143                                 tx_fail_total += tx_fail_slave;
144
145                                 memcpy(&bufs[nb_pkts - tx_fail_total],
146                                                 &slave_bufs[i][num_tx_slave], tx_fail_slave * sizeof(bufs[0]));
147                         }
148                         num_tx_total += num_tx_slave;
149                 }
150         }
151
152         return num_tx_total;
153 }
154
155 static uint16_t
156 bond_ethdev_tx_burst_active_backup(void *queue,
157                 struct rte_mbuf **bufs, uint16_t nb_pkts)
158 {
159         struct bond_dev_private *internals;
160         struct bond_tx_queue *bd_tx_q;
161
162         bd_tx_q = (struct bond_tx_queue *)queue;
163         internals = bd_tx_q->dev_private;
164
165         if (internals->active_slave_count < 1)
166                 return 0;
167
168         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
169                         bufs, nb_pkts);
170 }
171
172 static inline uint16_t
173 ether_hash(struct ether_hdr *eth_hdr)
174 {
175         uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
176         uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
177
178         return (word_src_addr[0] ^ word_dst_addr[0]) ^
179                         (word_src_addr[1] ^ word_dst_addr[1]) ^
180                         (word_src_addr[2] ^ word_dst_addr[2]);
181 }
182
183 static inline uint32_t
184 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
185 {
186         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
187 }
188
189 static inline uint32_t
190 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
191 {
192         uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
193         uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
194
195         return (word_src_addr[0] ^ word_dst_addr[0]) ^
196                         (word_src_addr[1] ^ word_dst_addr[1]) ^
197                         (word_src_addr[2] ^ word_dst_addr[2]) ^
198                         (word_src_addr[3] ^ word_dst_addr[3]);
199 }
200
201 static uint32_t
202 udp_hash(struct udp_hdr *hdr)
203 {
204         return hdr->src_port ^ hdr->dst_port;
205 }
206
207 static inline uint16_t
208 xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy)
209 {
210         struct ether_hdr *eth_hdr;
211         struct udp_hdr *udp_hdr;
212         size_t eth_offset = 0;
213         uint32_t hash = 0;
214
215         if (slave_count == 1)
216                 return 0;
217
218         switch (policy) {
219         case BALANCE_XMIT_POLICY_LAYER2:
220                 eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
221
222                 hash = ether_hash(eth_hdr);
223                 hash ^= hash >> 8;
224                 return hash % slave_count;
225
226         case BALANCE_XMIT_POLICY_LAYER23:
227                 eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
228
229                 if (buf->ol_flags & PKT_RX_VLAN_PKT)
230                         eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
231                 else
232                         eth_offset = sizeof(struct ether_hdr);
233
234                 if (buf->ol_flags & PKT_RX_IPV4_HDR) {
235                         struct ipv4_hdr *ipv4_hdr;
236                         ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(buf,
237                                         unsigned char *) + eth_offset);
238
239                         hash = ether_hash(eth_hdr) ^ ipv4_hash(ipv4_hdr);
240
241                 } else {
242                         struct ipv6_hdr *ipv6_hdr;
243
244                         ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(buf,
245                                         unsigned char *) + eth_offset);
246
247                         hash = ether_hash(eth_hdr) ^ ipv6_hash(ipv6_hdr);
248                 }
249                 break;
250
251         case BALANCE_XMIT_POLICY_LAYER34:
252                 if (buf->ol_flags & PKT_RX_VLAN_PKT)
253                         eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
254                 else
255                         eth_offset = sizeof(struct ether_hdr);
256
257                 if (buf->ol_flags & PKT_RX_IPV4_HDR) {
258                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
259                                         (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
260
261                         if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
262                                 udp_hdr = (struct udp_hdr *)
263                                                 (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
264                                                                 sizeof(struct ipv4_hdr));
265                                 hash = ipv4_hash(ipv4_hdr) ^ udp_hash(udp_hdr);
266                         } else {
267                                 hash = ipv4_hash(ipv4_hdr);
268                         }
269                 } else {
270                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
271                                         (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
272
273                         if (ipv6_hdr->proto == IPPROTO_UDP) {
274                                 udp_hdr = (struct udp_hdr *)
275                                                 (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
276                                                                 sizeof(struct ipv6_hdr));
277                                 hash = ipv6_hash(ipv6_hdr) ^ udp_hash(udp_hdr);
278                         } else {
279                                 hash = ipv6_hash(ipv6_hdr);
280                         }
281                 }
282                 break;
283         }
284
285         hash ^= hash >> 16;
286         hash ^= hash >> 8;
287
288         return hash % slave_count;
289 }
290
291 static uint16_t
292 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
293                 uint16_t nb_pkts)
294 {
295         struct bond_dev_private *internals;
296         struct bond_tx_queue *bd_tx_q;
297
298         uint8_t num_of_slaves;
299         uint8_t slaves[RTE_MAX_ETHPORTS];
300
301         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
302
303         int i, op_slave_id;
304
305         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
306         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
307
308         bd_tx_q = (struct bond_tx_queue *)queue;
309         internals = bd_tx_q->dev_private;
310
311         /* Copy slave list to protect against slave up/down changes during tx
312          * bursting */
313         num_of_slaves = internals->active_slave_count;
314         memcpy(slaves, internals->active_slaves,
315                         sizeof(internals->active_slaves[0]) * num_of_slaves);
316
317         if (num_of_slaves < 1)
318                 return num_tx_total;
319
320         /* Populate slaves mbuf with the packets which are to be sent on it  */
321         for (i = 0; i < nb_pkts; i++) {
322                 /* Select output slave using hash based on xmit policy */
323                 op_slave_id = xmit_slave_hash(bufs[i], num_of_slaves,
324                                 internals->balance_xmit_policy);
325
326                 /* Populate slave mbuf arrays with mbufs for that slave */
327                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
328         }
329
330         /* Send packet burst on each slave device */
331         for (i = 0; i < num_of_slaves; i++) {
332                 if (slave_nb_pkts[i] > 0) {
333                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
334                                         slave_bufs[i], slave_nb_pkts[i]);
335
336                         /* if tx burst fails move packets to end of bufs */
337                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
338                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
339
340                                 tx_fail_total += slave_tx_fail_count;
341                                 memcpy(bufs[nb_pkts - tx_fail_total],
342                                                 slave_bufs[i][num_tx_slave], slave_tx_fail_count);
343                         }
344
345                         num_tx_total += num_tx_slave;
346                 }
347         }
348
349
350         return num_tx_total;
351 }
352
353 #ifdef RTE_MBUF_REFCNT
354 static uint16_t
355 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
356                 uint16_t nb_pkts)
357 {
358         struct bond_dev_private *internals;
359         struct bond_tx_queue *bd_tx_q;
360
361         uint8_t tx_failed_flag = 0, num_of_slaves;
362         uint8_t slaves[RTE_MAX_ETHPORTS];
363
364         uint16_t max_nb_of_tx_pkts = 0;
365
366         int slave_tx_total[RTE_MAX_ETHPORTS];
367         int i, most_successful_tx_slave = -1;
368
369         bd_tx_q = (struct bond_tx_queue *)queue;
370         internals = bd_tx_q->dev_private;
371
372         /* Copy slave list to protect against slave up/down changes during tx
373          * bursting */
374         num_of_slaves = internals->active_slave_count;
375         memcpy(slaves, internals->active_slaves,
376                         sizeof(internals->active_slaves[0]) * num_of_slaves);
377
378         if (num_of_slaves < 1)
379                 return 0;
380
381         /* Increment reference count on mbufs */
382         for (i = 0; i < nb_pkts; i++)
383                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
384
385         /* Transmit burst on each active slave */
386         for (i = 0; i < num_of_slaves; i++) {
387                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
388                                         bufs, nb_pkts);
389
390                 if (unlikely(slave_tx_total[i] < nb_pkts))
391                         tx_failed_flag = 1;
392
393                 /* record the value and slave index for the slave which transmits the
394                  * maximum number of packets */
395                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
396                         max_nb_of_tx_pkts = slave_tx_total[i];
397                         most_successful_tx_slave = i;
398                 }
399         }
400
401         /* if slaves fail to transmit packets from burst, the calling application
402          * is not expected to know about multiple references to packets so we must
403          * handle failures of all packets except those of the most successful slave
404          */
405         if (unlikely(tx_failed_flag))
406                 for (i = 0; i < num_of_slaves; i++)
407                         if (i != most_successful_tx_slave)
408                                 while (slave_tx_total[i] < nb_pkts)
409                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
410
411         return max_nb_of_tx_pkts;
412 }
413 #endif
414
415 void
416 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
417                 struct rte_eth_link *slave_dev_link)
418 {
419         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
420         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
421
422         if (slave_dev_link->link_status &&
423                 bonded_eth_dev->data->dev_started) {
424                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
425                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
426
427                 internals->link_props_set = 1;
428         }
429 }
430
431 void
432 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
433 {
434         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
435
436         memset(&(bonded_eth_dev->data->dev_link), 0,
437                         sizeof(bonded_eth_dev->data->dev_link));
438
439         internals->link_props_set = 0;
440 }
441
442 int
443 link_properties_valid(struct rte_eth_link *bonded_dev_link,
444                 struct rte_eth_link *slave_dev_link)
445 {
446         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
447                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
448                 return -1;
449
450         return 0;
451 }
452
453 int
454 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
455 {
456         struct ether_addr *mac_addr;
457
458         mac_addr = eth_dev->data->mac_addrs;
459
460         if (eth_dev == NULL) {
461                 RTE_BOND_LOG(ERR,  "NULL pointer eth_dev specified");
462                 return -1;
463         }
464
465         if (new_mac_addr == NULL) {
466                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
467                 return -1;
468         }
469
470         /* If new MAC is different to current MAC then update */
471         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
472                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
473
474         return 0;
475 }
476
477 int
478 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
479 {
480         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
481         int i;
482
483         /* Update slave devices MAC addresses */
484         if (internals->slave_count < 1)
485                 return -1;
486
487         switch (internals->mode) {
488         case BONDING_MODE_ROUND_ROBIN:
489         case BONDING_MODE_BALANCE:
490 #ifdef RTE_MBUF_REFCNT
491         case BONDING_MODE_BROADCAST:
492 #endif
493                 for (i = 0; i < internals->slave_count; i++) {
494                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
495                                         bonded_eth_dev->data->mac_addrs)) {
496                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
497                                                 internals->slaves[i].port_id);
498                                 return -1;
499                         }
500                 }
501                 break;
502         case BONDING_MODE_ACTIVE_BACKUP:
503         default:
504                 for (i = 0; i < internals->slave_count; i++) {
505                         if (internals->slaves[i].port_id ==
506                                         internals->current_primary_port) {
507                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
508                                                 bonded_eth_dev->data->mac_addrs)) {
509                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
510                                                         internals->current_primary_port);
511                                         return -1;
512                                 }
513                         } else {
514                                 if (mac_address_set(
515                                                 &rte_eth_devices[internals->slaves[i].port_id],
516                                                 &internals->slaves[i].persisted_mac_addr)) {
517                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
518                                                         internals->slaves[i].port_id);
519                                         return -1;
520                                 }
521                         }
522                 }
523         }
524
525         return 0;
526 }
527
528 int
529 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
530 {
531         struct bond_dev_private *internals;
532
533         internals = eth_dev->data->dev_private;
534
535         switch (mode) {
536         case BONDING_MODE_ROUND_ROBIN:
537                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
538                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
539                 break;
540         case BONDING_MODE_ACTIVE_BACKUP:
541                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
542                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
543                 break;
544         case BONDING_MODE_BALANCE:
545                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
546                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
547                 break;
548 #ifdef RTE_MBUF_REFCNT
549         case BONDING_MODE_BROADCAST:
550                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
551                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
552                 break;
553 #endif
554         default:
555                 return -1;
556         }
557
558         internals->mode = mode;
559
560         return 0;
561 }
562
563 int
564 slave_configure(struct rte_eth_dev *bonded_eth_dev,
565                 struct rte_eth_dev *slave_eth_dev)
566 {
567         struct bond_rx_queue *bd_rx_q;
568         struct bond_tx_queue *bd_tx_q;
569
570         int errval, q_id;
571
572         /* Stop slave */
573         rte_eth_dev_stop(slave_eth_dev->data->port_id);
574
575         /* Enable interrupts on slave device if supported */
576         if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
577                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
578
579         /* Configure device */
580         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
581                         bonded_eth_dev->data->nb_rx_queues,
582                         bonded_eth_dev->data->nb_tx_queues,
583                         &(slave_eth_dev->data->dev_conf));
584         if (errval != 0) {
585                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
586                                 slave_eth_dev->data->port_id, errval);
587                 return errval;
588         }
589
590         /* Setup Rx Queues */
591         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
592                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
593
594                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
595                                 bd_rx_q->nb_rx_desc,
596                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
597                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
598                 if (errval != 0) {
599                         RTE_BOND_LOG(ERR,
600                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
601                                         slave_eth_dev->data->port_id, q_id, errval);
602                         return errval;
603                 }
604         }
605
606         /* Setup Tx Queues */
607         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
608                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
609
610                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
611                                 bd_tx_q->nb_tx_desc,
612                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
613                                 &bd_tx_q->tx_conf);
614                 if (errval != 0) {
615                         RTE_BOND_LOG(ERR,
616                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
617                                         slave_eth_dev->data->port_id, q_id, errval);
618                         return errval;
619                 }
620         }
621
622         /* Start device */
623         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
624         if (errval != 0) {
625                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
626                                 slave_eth_dev->data->port_id, errval);
627                 return -1;
628         }
629
630         return 0;
631 }
632
633 void
634 slave_remove(struct bond_dev_private *internals,
635                 struct rte_eth_dev *slave_eth_dev)
636 {
637         int i, found = 0;
638
639         for (i = 0; i < internals->slave_count; i++) {
640                 if (internals->slaves[i].port_id ==     slave_eth_dev->data->port_id)
641                         found = 1;
642
643                 if (found && i < (internals->slave_count - 1))
644                         memcpy(&internals->slaves[i], &internals->slaves[i+1],
645                                         sizeof(internals->slaves[i]));
646         }
647
648         internals->slave_count--;
649 }
650
651 static void
652 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
653
654 void
655 slave_add(struct bond_dev_private *internals,
656                 struct rte_eth_dev *slave_eth_dev)
657 {
658         struct bond_slave_details *slave_details =
659                         &internals->slaves[internals->slave_count];
660
661         slave_details->port_id = slave_eth_dev->data->port_id;
662         slave_details->last_link_status = 0;
663
664         /* If slave device doesn't support interrupts then we need to enabled
665          * polling to monitor link status */
666         if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
667                 slave_details->link_status_poll_enabled = 1;
668
669                 if (!internals->link_status_polling_enabled) {
670                         internals->link_status_polling_enabled = 1;
671
672                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
673                                         bond_ethdev_slave_link_status_change_monitor,
674                                         (void *)&rte_eth_devices[internals->port_id]);
675                 }
676         }
677
678         slave_details->link_status_wait_to_complete = 0;
679
680         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
681                         sizeof(struct ether_addr));
682 }
683
684 void
685 bond_ethdev_primary_set(struct bond_dev_private *internals,
686                 uint8_t slave_port_id)
687 {
688         int i;
689
690         if (internals->active_slave_count < 1)
691                 internals->current_primary_port = slave_port_id;
692         else
693                 /* Search bonded device slave ports for new proposed primary port */
694                 for (i = 0; i < internals->active_slave_count; i++) {
695                         if (internals->active_slaves[i] == slave_port_id)
696                                 internals->current_primary_port = slave_port_id;
697                 }
698 }
699
700 static void
701 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
702
703 static int
704 bond_ethdev_start(struct rte_eth_dev *eth_dev)
705 {
706         struct bond_dev_private *internals;
707         int i;
708
709         /* slave eth dev will be started by bonded device */
710         if (valid_bonded_ethdev(eth_dev)) {
711                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
712                                 eth_dev->data->port_id);
713                 return -1;
714         }
715
716         eth_dev->data->dev_link.link_status = 0;
717         eth_dev->data->dev_started = 1;
718
719         internals = eth_dev->data->dev_private;
720
721         if (internals->slave_count == 0) {
722                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
723                 return -1;
724         }
725
726         if (internals->user_defined_mac == 0) {
727                 struct ether_addr *new_mac_addr = NULL;
728
729                 for (i = 0; i < internals->slave_count; i++)
730                         if (internals->slaves[i].port_id == internals->primary_port)
731                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
732
733                 if (new_mac_addr == NULL)
734                         return -1;
735
736                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
737                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
738                                         eth_dev->data->port_id);
739                         return -1;
740                 }
741         }
742
743         /* Update all slave devices MACs*/
744         if (mac_address_slaves_update(eth_dev) != 0)
745                 return -1;
746
747         /* If bonded device is configure in promiscuous mode then re-apply config */
748         if (internals->promiscuous_en)
749                 bond_ethdev_promiscuous_enable(eth_dev);
750
751         /* Reconfigure each slave device if starting bonded device */
752         for (i = 0; i < internals->slave_count; i++) {
753                 if (slave_configure(eth_dev,
754                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
755                         RTE_BOND_LOG(ERR,
756                                         "bonded port (%d) failed to reconfigure slave device (%d)",
757                                         eth_dev->data->port_id, internals->slaves[i].port_id);
758                         return -1;
759                 }
760         }
761
762         if (internals->user_defined_primary_port)
763                 bond_ethdev_primary_set(internals, internals->primary_port);
764
765         return 0;
766 }
767
768 static void
769 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
770 {
771         struct bond_dev_private *internals = eth_dev->data->dev_private;
772
773         internals->active_slave_count = 0;
774         internals->link_status_polling_enabled = 0;
775
776         eth_dev->data->dev_link.link_status = 0;
777         eth_dev->data->dev_started = 0;
778 }
779
780 static void
781 bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
782 {
783 }
784
785 /* forward declaration */
786 static int bond_ethdev_configure(struct rte_eth_dev *dev);
787
788 static void
789 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
790 {
791         dev_info->driver_name = driver_name;
792         dev_info->max_mac_addrs = 1;
793
794         dev_info->max_rx_pktlen = (uint32_t)2048;
795
796         dev_info->max_rx_queues = (uint16_t)128;
797         dev_info->max_tx_queues = (uint16_t)512;
798
799         dev_info->min_rx_bufsize = 0;
800         dev_info->pci_dev = dev->pci_dev;
801 }
802
803 static int
804 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
805                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
806                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
807 {
808         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
809                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
810                                         0, dev->pci_dev->numa_node);
811         if (bd_rx_q == NULL)
812                 return -1;
813
814         bd_rx_q->queue_id = rx_queue_id;
815         bd_rx_q->dev_private = dev->data->dev_private;
816
817         bd_rx_q->nb_rx_desc = nb_rx_desc;
818
819         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
820         bd_rx_q->mb_pool = mb_pool;
821
822         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
823
824         return 0;
825 }
826
827 static int
828 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
829                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
830                 const struct rte_eth_txconf *tx_conf)
831 {
832         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
833                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
834                                         0, dev->pci_dev->numa_node);
835
836         if (bd_tx_q == NULL)
837                         return -1;
838
839         bd_tx_q->queue_id = tx_queue_id;
840         bd_tx_q->dev_private = dev->data->dev_private;
841
842         bd_tx_q->nb_tx_desc = nb_tx_desc;
843         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
844
845         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
846
847         return 0;
848 }
849
850 static void
851 bond_ethdev_rx_queue_release(void *queue)
852 {
853         if (queue == NULL)
854                 return;
855
856         rte_free(queue);
857 }
858
859 static void
860 bond_ethdev_tx_queue_release(void *queue)
861 {
862         if (queue == NULL)
863                 return;
864
865         rte_free(queue);
866 }
867
868
869 static void
870 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
871 {
872         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
873         struct bond_dev_private *internals;
874
875         /* Default value for polling slave found is true as we don't want to
876          * disable the polling thread if we cannot get the lock */
877         int i, polling_slave_found = 1;
878
879         if (cb_arg == NULL)
880                 return;
881
882         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
883         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
884
885         if (!bonded_ethdev->data->dev_started ||
886                 !internals->link_status_polling_enabled)
887                 return;
888
889         /* If device is currently being configured then don't check slaves link
890          * status, wait until next period */
891         if (rte_spinlock_trylock(&internals->lock)) {
892                 if (internals->slave_count > 0)
893                         polling_slave_found = 0;
894
895                 for (i = 0; i < internals->slave_count; i++) {
896                         if (!internals->slaves[i].link_status_poll_enabled)
897                                 continue;
898
899                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
900                         polling_slave_found = 1;
901
902                         /* Update slave link status */
903                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
904                                         internals->slaves[i].link_status_wait_to_complete);
905
906                         /* if link status has changed since last checked then call lsc
907                          * event callback */
908                         if (slave_ethdev->data->dev_link.link_status !=
909                                         internals->slaves[i].last_link_status) {
910                                 internals->slaves[i].last_link_status =
911                                                 slave_ethdev->data->dev_link.link_status;
912
913                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
914                                                 RTE_ETH_EVENT_INTR_LSC,
915                                                 &bonded_ethdev->data->port_id);
916                         }
917                 }
918                 rte_spinlock_unlock(&internals->lock);
919         }
920
921         if (polling_slave_found)
922                 /* Set alarm to continue monitoring link status of slave ethdev's */
923                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
924                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
925 }
926
927 static int
928 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
929                 int wait_to_complete)
930 {
931         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
932
933         if (!bonded_eth_dev->data->dev_started ||
934                 internals->active_slave_count == 0) {
935                 bonded_eth_dev->data->dev_link.link_status = 0;
936                 return 0;
937         } else {
938                 struct rte_eth_dev *slave_eth_dev;
939                 int i, link_up = 0;
940
941                 for (i = 0; i < internals->active_slave_count; i++) {
942                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
943
944                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
945                                         wait_to_complete);
946                         if (slave_eth_dev->data->dev_link.link_status == 1) {
947                                 link_up = 1;
948                                 break;
949                         }
950                 }
951
952                 bonded_eth_dev->data->dev_link.link_status = link_up;
953         }
954
955         return 0;
956 }
957
958 static void
959 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
960 {
961         struct bond_dev_private *internals = dev->data->dev_private;
962         struct rte_eth_stats slave_stats;
963
964         int i;
965
966         /* clear bonded stats before populating from slaves */
967         memset(stats, 0, sizeof(*stats));
968
969         for (i = 0; i < internals->slave_count; i++) {
970                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
971
972                 stats->ipackets += slave_stats.ipackets;
973                 stats->opackets += slave_stats.opackets;
974                 stats->ibytes += slave_stats.ibytes;
975                 stats->obytes += slave_stats.obytes;
976                 stats->ierrors += slave_stats.ierrors;
977                 stats->oerrors += slave_stats.oerrors;
978                 stats->imcasts += slave_stats.imcasts;
979                 stats->rx_nombuf += slave_stats.rx_nombuf;
980                 stats->fdirmatch += slave_stats.fdirmatch;
981                 stats->fdirmiss += slave_stats.fdirmiss;
982                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
983                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
984                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
985                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
986         }
987 }
988
989 static void
990 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
991 {
992         struct bond_dev_private *internals = dev->data->dev_private;
993         int i;
994
995         for (i = 0; i < internals->slave_count; i++)
996                 rte_eth_stats_reset(internals->slaves[i].port_id);
997 }
998
999 static void
1000 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1001 {
1002         struct bond_dev_private *internals = eth_dev->data->dev_private;
1003         int i;
1004
1005         internals->promiscuous_en = 1;
1006
1007         switch (internals->mode) {
1008         /* Promiscuous mode is propagated to all slaves */
1009         case BONDING_MODE_ROUND_ROBIN:
1010         case BONDING_MODE_BALANCE:
1011 #ifdef RTE_MBUF_REFCNT
1012         case BONDING_MODE_BROADCAST:
1013 #endif
1014                 for (i = 0; i < internals->slave_count; i++)
1015                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1016                 break;
1017         /* Promiscuous mode is propagated only to primary slave */
1018         case BONDING_MODE_ACTIVE_BACKUP:
1019         default:
1020                 rte_eth_promiscuous_enable(internals->current_primary_port);
1021
1022         }
1023 }
1024
1025 static void
1026 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1027 {
1028         struct bond_dev_private *internals = dev->data->dev_private;
1029         int i;
1030
1031         internals->promiscuous_en = 0;
1032
1033         switch (internals->mode) {
1034         /* Promiscuous mode is propagated to all slaves */
1035         case BONDING_MODE_ROUND_ROBIN:
1036         case BONDING_MODE_BALANCE:
1037 #ifdef RTE_MBUF_REFCNT
1038         case BONDING_MODE_BROADCAST:
1039 #endif
1040                 for (i = 0; i < internals->slave_count; i++)
1041                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1042                 break;
1043         /* Promiscuous mode is propagated only to primary slave */
1044         case BONDING_MODE_ACTIVE_BACKUP:
1045         default:
1046                 rte_eth_promiscuous_disable(internals->current_primary_port);
1047         }
1048 }
1049
1050 static void
1051 bond_ethdev_delayed_lsc_propagation(void *arg)
1052 {
1053         if (arg == NULL)
1054                 return;
1055
1056         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1057                         RTE_ETH_EVENT_INTR_LSC);
1058 }
1059
1060 void
1061 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1062                 void *param)
1063 {
1064         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1065         struct bond_dev_private *internals;
1066         struct rte_eth_link link;
1067
1068         int i, valid_slave = 0, active_pos = -1;
1069         uint8_t lsc_flag = 0;
1070
1071         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1072                 return;
1073
1074         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1075         slave_eth_dev = &rte_eth_devices[port_id];
1076
1077         if (valid_bonded_ethdev(bonded_eth_dev))
1078                 return;
1079
1080         internals = bonded_eth_dev->data->dev_private;
1081
1082         /* If the device isn't started don't handle interrupts */
1083         if (!bonded_eth_dev->data->dev_started)
1084                 return;
1085
1086         /* verify that port_id is a valid slave of bonded port */
1087         for (i = 0; i < internals->slave_count; i++) {
1088                 if (internals->slaves[i].port_id == port_id) {
1089                         valid_slave = 1;
1090                         break;
1091                 }
1092         }
1093
1094         if (!valid_slave)
1095                 return;
1096
1097         /* Search for port in active port list */
1098         for (i = 0; i < internals->active_slave_count; i++) {
1099                 if (port_id == internals->active_slaves[i]) {
1100                         active_pos = i;
1101                         break;
1102                 }
1103         }
1104
1105         rte_eth_link_get_nowait(port_id, &link);
1106         if (link.link_status) {
1107                 if (active_pos >= 0)
1108                         return;
1109
1110                 /* if no active slave ports then set this port to be primary port */
1111                 if (internals->active_slave_count < 1) {
1112                         /* If first active slave, then change link status */
1113                         bonded_eth_dev->data->dev_link.link_status = 1;
1114                         internals->current_primary_port = port_id;
1115                         lsc_flag = 1;
1116
1117                         /* Inherit eth dev link properties from first active slave */
1118                         link_properties_set(bonded_eth_dev,
1119                                         &(slave_eth_dev->data->dev_link));
1120                 }
1121                 internals->active_slaves[internals->active_slave_count++] = port_id;
1122
1123                 /* If user has defined the primary port then default to using it */
1124                 if (internals->user_defined_primary_port &&
1125                                 internals->primary_port == port_id)
1126                         bond_ethdev_primary_set(internals, port_id);
1127         } else {
1128                 if (active_pos < 0)
1129                         return;
1130
1131                 /* Remove from active slave list */
1132                 for (i = active_pos; i < (internals->active_slave_count - 1); i++)
1133                         internals->active_slaves[i] = internals->active_slaves[i+1];
1134
1135                 internals->active_slave_count--;
1136
1137                 /* No active slaves, change link status to down and reset other
1138                  * link properties */
1139                 if (internals->active_slave_count < 1) {
1140                         lsc_flag = 1;
1141                         bonded_eth_dev->data->dev_link.link_status = 0;
1142
1143                         link_properties_reset(bonded_eth_dev);
1144                 }
1145
1146                 /* Update primary id, take first active slave from list or if none
1147                  * available set to -1 */
1148                 if (port_id == internals->current_primary_port) {
1149                         if (internals->active_slave_count > 0)
1150                                 bond_ethdev_primary_set(internals,
1151                                                 internals->active_slaves[0]);
1152                         else
1153                                 internals->current_primary_port = internals->primary_port;
1154                 }
1155         }
1156
1157         if (lsc_flag) {
1158                 /* Cancel any possible outstanding interrupts if delays are enabled */
1159                 if (internals->link_up_delay_ms > 0 ||
1160                         internals->link_down_delay_ms > 0)
1161                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
1162                                         bonded_eth_dev);
1163
1164                 if (bonded_eth_dev->data->dev_link.link_status) {
1165                         if (internals->link_up_delay_ms > 0)
1166                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
1167                                                 bond_ethdev_delayed_lsc_propagation,
1168                                                 (void *)bonded_eth_dev);
1169                         else
1170                                 _rte_eth_dev_callback_process(bonded_eth_dev,
1171                                                 RTE_ETH_EVENT_INTR_LSC);
1172
1173                 } else {
1174                         if (internals->link_down_delay_ms > 0)
1175                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
1176                                                 bond_ethdev_delayed_lsc_propagation,
1177                                                 (void *)bonded_eth_dev);
1178                         else
1179                                 _rte_eth_dev_callback_process(bonded_eth_dev,
1180                                                 RTE_ETH_EVENT_INTR_LSC);
1181                 }
1182         }
1183 }
1184
1185 struct eth_dev_ops default_dev_ops = {
1186                 .dev_start = bond_ethdev_start,
1187                 .dev_stop = bond_ethdev_stop,
1188                 .dev_close = bond_ethdev_close,
1189                 .dev_configure = bond_ethdev_configure,
1190                 .dev_infos_get = bond_ethdev_info,
1191                 .rx_queue_setup = bond_ethdev_rx_queue_setup,
1192                 .tx_queue_setup = bond_ethdev_tx_queue_setup,
1193                 .rx_queue_release = bond_ethdev_rx_queue_release,
1194                 .tx_queue_release = bond_ethdev_tx_queue_release,
1195                 .link_update = bond_ethdev_link_update,
1196                 .stats_get = bond_ethdev_stats_get,
1197                 .stats_reset = bond_ethdev_stats_reset,
1198                 .promiscuous_enable = bond_ethdev_promiscuous_enable,
1199                 .promiscuous_disable = bond_ethdev_promiscuous_disable
1200 };
1201
1202 static int
1203 bond_init(const char *name, const char *params)
1204 {
1205         struct bond_dev_private *internals;
1206         struct rte_kvargs *kvlist;
1207         uint8_t bonding_mode, socket_id;
1208         int  arg_count, port_id;
1209
1210         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
1211
1212         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
1213         if (kvlist == NULL)
1214                 return -1;
1215
1216         /* Parse link bonding mode */
1217         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
1218                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
1219                                 &bond_ethdev_parse_slave_mode_kvarg, &bonding_mode) != 0) {
1220                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n", name);
1221                         return -1;
1222                 }
1223         } else {
1224                 RTE_LOG(ERR, EAL,
1225                                 "Mode must be specified only once for bonded device %s\n",
1226                                 name);
1227                 return -1;
1228         }
1229
1230         /* Parse socket id to create bonding device on */
1231         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
1232         if (arg_count == 1) {
1233                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
1234                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id) != 0) {
1235                         RTE_LOG(ERR, EAL,
1236                                         "Invalid socket Id specified for bonded device %s\n",
1237                                         name);
1238                         return -1;
1239                 }
1240         } else if (arg_count > 1) {
1241                 RTE_LOG(ERR, EAL,
1242                                 "Socket Id can be specified only once for bonded device %s\n",
1243                                 name);
1244                 return -1;
1245         } else {
1246                 socket_id = rte_socket_id();
1247         }
1248
1249         /* Create link bonding eth device */
1250         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
1251         if (port_id < 0) {
1252                 RTE_LOG(ERR, EAL,
1253                                 "Failed to create socket %s in mode %u on socket %u.\n",
1254                                 name, bonding_mode, socket_id);
1255                 return -1;
1256         }
1257         internals = rte_eth_devices[port_id].data->dev_private;
1258         internals->kvlist = kvlist;
1259
1260         RTE_LOG(INFO, EAL,
1261                         "Create bonded device %s on port %d in mode %u on socket %u.\n",
1262                         name, port_id, bonding_mode, socket_id);
1263         return 0;
1264 }
1265
1266 /* this part will resolve the slave portids after all the other pdev and vdev
1267  * have been allocated */
1268 static int
1269 bond_ethdev_configure(struct rte_eth_dev *dev)
1270 {
1271         char *name = dev->data->name;
1272         struct bond_dev_private *internals = dev->data->dev_private;
1273         struct rte_kvargs *kvlist = internals->kvlist;
1274         int arg_count, port_id = dev - rte_eth_devices;
1275
1276         /*
1277          * if no kvlist, it means that this bonded device has been created
1278          * through the bonding api.
1279          */
1280         if (!kvlist)
1281                 return 0;
1282
1283         /* Parse MAC address for bonded device */
1284         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
1285         if (arg_count == 1) {
1286                 struct ether_addr bond_mac;
1287
1288                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
1289                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
1290                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
1291                                         name);
1292                         return -1;
1293                 }
1294
1295                 /* Set MAC address */
1296                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
1297                         RTE_LOG(ERR, EAL,
1298                                         "Failed to set mac address on bonded device %s\n",
1299                                         name);
1300                         return -1;
1301                 }
1302         } else if (arg_count > 1) {
1303                 RTE_LOG(ERR, EAL,
1304                                 "MAC address can be specified only once for bonded device %s\n",
1305                                 name);
1306                 return -1;
1307         }
1308
1309         /* Parse/set balance mode transmit policy */
1310         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
1311         if (arg_count == 1) {
1312                 uint8_t xmit_policy;
1313
1314                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
1315                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
1316                                                 0) {
1317                         RTE_LOG(INFO, EAL,
1318                                         "Invalid xmit policy specified for bonded device %s\n",
1319                                         name);
1320                         return -1;
1321                 }
1322
1323                 /* Set balance mode transmit policy*/
1324                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
1325                         RTE_LOG(ERR, EAL,
1326                                         "Failed to set balance xmit policy on bonded device %s\n",
1327                                         name);
1328                         return -1;
1329                 }
1330         } else if (arg_count > 1) {
1331                 RTE_LOG(ERR, EAL,
1332                                 "Transmit policy can be specified only once for bonded device"
1333                                 " %s\n", name);
1334                 return -1;
1335         }
1336
1337         /* Parse/add slave ports to bonded device */
1338         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
1339                 struct bond_ethdev_slave_ports slave_ports;
1340                 unsigned i;
1341
1342                 memset(&slave_ports, 0, sizeof(slave_ports));
1343
1344                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
1345                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
1346                         RTE_LOG(ERR, EAL,
1347                                         "Failed to parse slave ports for bonded device %s\n",
1348                                         name);
1349                         return -1;
1350                 }
1351
1352                 for (i = 0; i < slave_ports.slave_count; i++) {
1353                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
1354                                 RTE_LOG(ERR, EAL,
1355                                                 "Failed to add port %d as slave to bonded device %s\n",
1356                                                 slave_ports.slaves[i], name);
1357                         }
1358                 }
1359
1360         } else {
1361                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
1362                 return -1;
1363         }
1364
1365         /* Parse/set primary slave port id*/
1366         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
1367         if (arg_count == 1) {
1368                 uint8_t primary_slave_port_id;
1369
1370                 if (rte_kvargs_process(kvlist,
1371                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
1372                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
1373                                 &primary_slave_port_id) < 0) {
1374                         RTE_LOG(INFO, EAL,
1375                                         "Invalid primary slave port id specified for bonded device"
1376                                         " %s\n", name);
1377                         return -1;
1378                 }
1379
1380                 /* Set balance mode transmit policy*/
1381                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
1382                                 != 0) {
1383                         RTE_LOG(ERR, EAL,
1384                                         "Failed to set primary slave port %d on bonded device %s\n",
1385                                         primary_slave_port_id, name);
1386                         return -1;
1387                 }
1388         } else if (arg_count > 1) {
1389                 RTE_LOG(INFO, EAL,
1390                                 "Primary slave can be specified only once for bonded device"
1391                                 " %s\n", name);
1392                 return -1;
1393         }
1394
1395         /* Parse link status monitor polling interval */
1396         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
1397         if (arg_count == 1) {
1398                 uint32_t lsc_poll_interval_ms;
1399
1400                 if (rte_kvargs_process(kvlist,
1401                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
1402                                 &bond_ethdev_parse_time_ms_kvarg,
1403                                 &lsc_poll_interval_ms) < 0) {
1404                         RTE_LOG(INFO, EAL,
1405                                         "Invalid lsc polling interval value specified for bonded"
1406                                         " device %s\n", name);
1407                         return -1;
1408                 }
1409
1410                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
1411                                 != 0) {
1412                         RTE_LOG(ERR, EAL,
1413                                         "Failed to set lsc monitor polling interval (%u ms) on"
1414                                         " bonded device %s\n", lsc_poll_interval_ms, name);
1415                         return -1;
1416                 }
1417         } else if (arg_count > 1) {
1418                 RTE_LOG(INFO, EAL,
1419                                 "LSC polling interval can be specified only once for bonded"
1420                                 " device %s\n", name);
1421                 return -1;
1422         }
1423
1424         /* Parse link up interrupt propagation delay */
1425         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
1426         if (arg_count == 1) {
1427                 uint32_t link_up_delay_ms;
1428
1429                 if (rte_kvargs_process(kvlist,
1430                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
1431                                 &bond_ethdev_parse_time_ms_kvarg,
1432                                 &link_up_delay_ms) < 0) {
1433                         RTE_LOG(INFO, EAL,
1434                                         "Invalid link up propagation delay value specified for"
1435                                         " bonded device %s\n", name);
1436                         return -1;
1437                 }
1438
1439                 /* Set balance mode transmit policy*/
1440                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
1441                                 != 0) {
1442                         RTE_LOG(ERR, EAL,
1443                                         "Failed to set link up propagation delay (%u ms) on bonded"
1444                                         " device %s\n", link_up_delay_ms, name);
1445                         return -1;
1446                 }
1447         } else if (arg_count > 1) {
1448                 RTE_LOG(INFO, EAL,
1449                                 "Link up propagation delay can be specified only once for"
1450                                 " bonded device %s\n", name);
1451                 return -1;
1452         }
1453
1454         /* Parse link down interrupt propagation delay */
1455         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
1456         if (arg_count == 1) {
1457                 uint32_t link_down_delay_ms;
1458
1459                 if (rte_kvargs_process(kvlist,
1460                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
1461                                 &bond_ethdev_parse_time_ms_kvarg,
1462                                 &link_down_delay_ms) < 0) {
1463                         RTE_LOG(INFO, EAL,
1464                                         "Invalid link down propagation delay value specified for"
1465                                         " bonded device %s\n", name);
1466                         return -1;
1467                 }
1468
1469                 /* Set balance mode transmit policy*/
1470                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
1471                                 != 0) {
1472                         RTE_LOG(ERR, EAL,
1473                                         "Failed to set link down propagation delay (%u ms) on"
1474                                         " bonded device %s\n", link_down_delay_ms, name);
1475                         return -1;
1476                 }
1477         } else if (arg_count > 1) {
1478                 RTE_LOG(INFO, EAL,
1479                                 "Link down propagation delay can be specified only once for"
1480                                 " bonded device %s\n", name);
1481                 return -1;
1482         }
1483
1484         return 0;
1485 }
1486
1487 static struct rte_driver bond_drv = {
1488         .name = "eth_bond",
1489         .type = PMD_VDEV,
1490         .init = bond_init,
1491 };
1492
1493 PMD_REGISTER_DRIVER(bond_drv);