bond: new link bonding library
[dpdk.git] / lib / librte_pmd_bond / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <linux/binfmts.h>
36
37 #include <rte_mbuf.h>
38 #include <rte_cycles.h>
39 #include <rte_dev.h>
40 #include <rte_devargs.h>
41 #include <rte_ethdev.h>
42 #include <rte_ip.h>
43 #include <rte_kvargs.h>
44 #include <rte_malloc.h>
45 #include <rte_memcpy.h>
46 #include <rte_memory.h>
47 #include <rte_udp.h>
48
49 #include "rte_eth_bond.h"
50 #include "rte_eth_bond_private.h"
51
52 static uint16_t
53 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
54 {
55         struct bond_dev_private *internals;
56
57         uint16_t num_rx_slave = 0;
58         uint16_t num_rx_total = 0;
59
60         int i;
61
62         /* Cast to structure, containing bonded device's port id and queue id */
63         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
64
65         internals = bd_rx_q->dev_private;
66
67         switch (internals->mode) {
68         case BONDING_MODE_ROUND_ROBIN:
69         case BONDING_MODE_BROADCAST:
70         case BONDING_MODE_BALANCE:
71                 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
72                         /* Offset of pointer to *bufs increases as packets are received
73                          * from other slaves */
74                         num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
75                                         bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
76                         if (num_rx_slave) {
77                                 num_rx_total += num_rx_slave;
78                                 nb_pkts -= num_rx_slave;
79                         }
80                 }
81                 break;
82         case BONDING_MODE_ACTIVE_BACKUP:
83                 num_rx_slave = rte_eth_rx_burst(internals->current_primary_port,
84                                 bd_rx_q->queue_id, bufs, nb_pkts);
85                 if (num_rx_slave)
86                         num_rx_total = num_rx_slave;
87                 break;
88         }
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_tx_round_robin(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *dev_private;
97         struct bond_tx_queue *bd_tx_q;
98
99         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
100         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
101
102         uint8_t num_of_slaves;
103         uint8_t slaves[RTE_MAX_ETHPORTS];
104
105         uint16_t num_tx_total = 0;
106
107         static int slave_idx = 0;
108         int i, cs_idx = 0;
109
110         bd_tx_q = (struct bond_tx_queue *)queue;
111         dev_private = bd_tx_q->dev_private;
112
113         /* Copy slave list to protect against slave up/down changes during tx
114          * bursting */
115         num_of_slaves = dev_private->active_slave_count;
116         memcpy(slaves, dev_private->active_slaves,
117                         sizeof(dev_private->active_slaves[0]) * num_of_slaves);
118
119         if (num_of_slaves < 1)
120                 return num_tx_total;
121
122         /* Populate slaves mbuf with which packets are to be sent on it  */
123         for (i = 0; i < nb_pkts; i++) {
124                 cs_idx = (slave_idx + i) % num_of_slaves;
125                 slave_bufs[cs_idx][(slave_nb_pkts[cs_idx])++] = bufs[i];
126         }
127
128         /* increment current slave index so the next call to tx burst starts on the
129          * next slave */
130         slave_idx = ++cs_idx;
131
132         /* Send packet burst on each slave device */
133         for (i = 0; i < num_of_slaves; i++)
134                 if (slave_nb_pkts[i] > 0)
135                         num_tx_total += rte_eth_tx_burst(slaves[i],
136                                         bd_tx_q->queue_id, slave_bufs[i], slave_nb_pkts[i]);
137
138         return num_tx_total;
139 }
140
141 static uint16_t
142 bond_ethdev_tx_active_backup(void *queue,
143                 struct rte_mbuf **bufs, uint16_t nb_pkts)
144 {
145         struct bond_dev_private *internals;
146         struct bond_tx_queue *bd_tx_q;
147
148         bd_tx_q = (struct bond_tx_queue *)queue;
149         internals = bd_tx_q->dev_private;
150
151         if (internals->active_slave_count < 1)
152                 return 0;
153
154         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
155                         bufs, nb_pkts);
156 }
157
158 static inline uint16_t
159 ether_hash(struct ether_hdr *eth_hdr)
160 {
161         uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
162         uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
163
164         return (word_src_addr[0] ^ word_dst_addr[0]) ^
165                         (word_src_addr[1] ^ word_dst_addr[1]) ^
166                         (word_src_addr[2] ^ word_dst_addr[2]);
167 }
168
169 static inline uint32_t
170 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
171 {
172         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
173 }
174
175 static inline uint32_t
176 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
177 {
178         uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
179         uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
180
181         return (word_src_addr[0] ^ word_dst_addr[0]) ^
182                         (word_src_addr[1] ^ word_dst_addr[1]) ^
183                         (word_src_addr[2] ^ word_dst_addr[2]) ^
184                         (word_src_addr[3] ^ word_dst_addr[3]);
185 }
186
187 static uint32_t
188 udp_hash(struct udp_hdr *hdr)
189 {
190         return hdr->src_port ^ hdr->dst_port;
191 }
192
193 static inline uint16_t
194 xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy)
195 {
196         struct ether_hdr *eth_hdr;
197         struct udp_hdr *udp_hdr;
198         size_t eth_offset = 0;
199         uint32_t hash = 0;
200
201         if (slave_count == 1)
202                 return 0;
203
204         switch (policy) {
205         case BALANCE_XMIT_POLICY_LAYER2:
206                 eth_hdr = (struct ether_hdr *)buf->pkt.data;
207
208                 hash = ether_hash(eth_hdr);
209                 hash ^= hash >> 8;
210                 return hash % slave_count;
211
212         case BALANCE_XMIT_POLICY_LAYER23:
213                 eth_hdr = (struct ether_hdr *)buf->pkt.data;
214
215                 if (buf->ol_flags & PKT_RX_VLAN_PKT)
216                         eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
217                 else
218                         eth_offset = sizeof(struct ether_hdr);
219
220                 if (buf->ol_flags & PKT_RX_IPV4_HDR) {
221                         struct ipv4_hdr *ipv4_hdr;
222                         ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(buf,
223                                         unsigned char *) + eth_offset);
224
225                         hash = ether_hash(eth_hdr) ^ ipv4_hash(ipv4_hdr);
226
227                 } else {
228                         struct ipv6_hdr *ipv6_hdr;
229
230                         ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(buf,
231                                         unsigned char *) + eth_offset);
232
233                         hash = ether_hash(eth_hdr) ^ ipv6_hash(ipv6_hdr);
234                 }
235                 break;
236
237         case BALANCE_XMIT_POLICY_LAYER34:
238                 if (buf->ol_flags & PKT_RX_VLAN_PKT)
239                         eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
240                 else
241                         eth_offset = sizeof(struct ether_hdr);
242
243                 if (buf->ol_flags & PKT_RX_IPV4_HDR) {
244                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
245                                         (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
246
247                         if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
248                                 udp_hdr = (struct udp_hdr *)
249                                                 (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
250                                                                 sizeof(struct ipv4_hdr));
251                                 hash = ipv4_hash(ipv4_hdr) ^ udp_hash(udp_hdr);
252                         } else {
253                                 hash = ipv4_hash(ipv4_hdr);
254                         }
255                 } else {
256                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
257                                         (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
258
259                         if (ipv6_hdr->proto == IPPROTO_UDP) {
260                                 udp_hdr = (struct udp_hdr *)
261                                                 (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
262                                                                 sizeof(struct ipv6_hdr));
263                                 hash = ipv6_hash(ipv6_hdr) ^ udp_hash(udp_hdr);
264                         } else {
265                                 hash = ipv6_hash(ipv6_hdr);
266                         }
267                 }
268                 break;
269         }
270
271         hash ^= hash >> 16;
272         hash ^= hash >> 8;
273
274         return hash % slave_count;
275 }
276
277 static uint16_t
278 bond_ethdev_tx_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
279 {
280         struct bond_dev_private *internals;
281         struct bond_tx_queue *bd_tx_q;
282
283         uint8_t num_of_slaves;
284         uint8_t slaves[RTE_MAX_ETHPORTS];
285
286         uint16_t num_tx_total = 0;
287
288         int i, op_slave_id;
289
290         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
291         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
292
293         bd_tx_q = (struct bond_tx_queue *)queue;
294         internals = bd_tx_q->dev_private;
295
296         /* Copy slave list to protect against slave up/down changes during tx
297          * bursting */
298         num_of_slaves = internals->active_slave_count;
299         memcpy(slaves, internals->active_slaves,
300                         sizeof(internals->active_slaves[0]) * num_of_slaves);
301
302         if (num_of_slaves < 1)
303                 return num_tx_total;
304
305         /* Populate slaves mbuf with the packets which are to be sent on it  */
306         for (i = 0; i < nb_pkts; i++) {
307                 /* Select output slave using hash based on xmit policy */
308                 op_slave_id = xmit_slave_hash(bufs[i], num_of_slaves,
309                                 internals->balance_xmit_policy);
310
311                 /* Populate slave mbuf arrays with mbufs for that slave */
312                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
313         }
314
315         /* Send packet burst on each slave device */
316         for (i = 0; i < num_of_slaves; i++) {
317                 if (slave_nb_pkts[i] > 0) {
318                         num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
319                                         slave_bufs[i], slave_nb_pkts[i]);
320                 }
321         }
322
323         return num_tx_total;
324 }
325
326 static uint16_t
327 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
328                 uint16_t nb_pkts)
329 {
330         struct bond_dev_private *internals;
331         struct bond_tx_queue *bd_tx_q;
332
333         uint8_t num_of_slaves;
334         uint8_t slaves[RTE_MAX_ETHPORTS];
335
336         uint16_t num_tx_total = 0;
337
338         int i;
339
340         bd_tx_q = (struct bond_tx_queue *)queue;
341         internals = bd_tx_q->dev_private;
342
343         /* Copy slave list to protect against slave up/down changes during tx
344          * bursting */
345         num_of_slaves = internals->active_slave_count;
346         memcpy(slaves, internals->active_slaves,
347                         sizeof(internals->active_slaves[0]) * num_of_slaves);
348
349         if (num_of_slaves < 1)
350                 return 0;
351
352         /* Increment reference count on mbufs */
353         for (i = 0; i < nb_pkts; i++)
354                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
355
356         /* Transmit burst on each active slave */
357         for (i = 0; i < num_of_slaves; i++)
358                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
359                                 bufs, nb_pkts);
360
361         return num_tx_total;
362 }
363
364 void
365 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
366                 struct rte_eth_link *slave_dev_link)
367 {
368         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
369         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
370
371         if (slave_dev_link->link_status &&
372                 bonded_eth_dev->data->dev_started) {
373                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
374                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
375
376                 internals->link_props_set = 1;
377         }
378 }
379
380 void
381 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
382 {
383         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
384
385         memset(&(bonded_eth_dev->data->dev_link), 0,
386                         sizeof(bonded_eth_dev->data->dev_link));
387
388         internals->link_props_set = 0;
389 }
390
391 int
392 link_properties_valid(struct rte_eth_link *bonded_dev_link,
393                 struct rte_eth_link *slave_dev_link)
394 {
395         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
396                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
397                 return -1;
398
399         return 0;
400 }
401
402 int
403 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
404 {
405         struct ether_addr *mac_addr;
406
407         mac_addr = eth_dev->data->mac_addrs;
408
409         if (eth_dev == NULL) {
410                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
411                 return -1;
412         }
413
414         if (new_mac_addr == NULL) {
415                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
416                 return -1;
417         }
418
419         /* if new MAC is different to current MAC then update */
420         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
421                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
422
423         return 0;
424 }
425
426 int
427 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
428 {
429         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
430         int i;
431
432         /* Update slave devices MAC addresses */
433         if (internals->slave_count < 1)
434                 return -1;
435
436         switch (internals->mode) {
437         case BONDING_MODE_ROUND_ROBIN:
438         case BONDING_MODE_BALANCE:
439         case BONDING_MODE_BROADCAST:
440                 for (i = 0; i < internals->slave_count; i++) {
441                         if (mac_address_set(&rte_eth_devices[internals->slaves[i]],
442                                         bonded_eth_dev->data->mac_addrs)) {
443                                 RTE_LOG(ERR, PMD,
444                                                 "%s: Failed to update port Id %d MAC address\n",
445                                                 __func__, internals->slaves[i]);
446                                 return -1;
447                         }
448                 }
449                 break;
450         case BONDING_MODE_ACTIVE_BACKUP:
451         default:
452                 for (i = 0; i < internals->slave_count; i++) {
453                         if (internals->slaves[i] == internals->current_primary_port) {
454                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
455                                                 bonded_eth_dev->data->mac_addrs)) {
456                                         RTE_LOG(ERR, PMD,
457                                                         "%s: Failed to update port Id %d MAC address\n",
458                                                         __func__, internals->current_primary_port);
459                                 }
460                         } else {
461                                 struct slave_conf *conf =
462                                                 slave_config_get(internals, internals->slaves[i]);
463
464                                 if (mac_address_set(&rte_eth_devices[internals->slaves[i]],
465                                                 &conf->mac_addr)) {
466                                         RTE_LOG(ERR, PMD,
467                                                         "%s: Failed to update port Id %d MAC address\n",
468                                                         __func__, internals->slaves[i]);
469
470                                         return -1;
471                                 }
472                         }
473                 }
474         }
475
476         return 0;
477 }
478
479 int
480 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
481 {
482         struct bond_dev_private *internals;
483
484         internals = eth_dev->data->dev_private;
485
486         switch (mode) {
487         case BONDING_MODE_ROUND_ROBIN:
488                 eth_dev->tx_pkt_burst = bond_ethdev_tx_round_robin;
489                 break;
490         case BONDING_MODE_ACTIVE_BACKUP:
491                 eth_dev->tx_pkt_burst = bond_ethdev_tx_active_backup;
492                 break;
493         case BONDING_MODE_BALANCE:
494                 eth_dev->tx_pkt_burst = bond_ethdev_tx_balance;
495                 break;
496         case BONDING_MODE_BROADCAST:
497                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
498                 break;
499         default:
500                 return -1;
501         }
502
503         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
504         internals->mode = mode;
505
506         return 0;
507 }
508
509 int
510 slave_configure(struct rte_eth_dev *bonded_eth_dev,
511                 struct rte_eth_dev *slave_eth_dev)
512 {
513         struct bond_rx_queue *bd_rx_q;
514         struct bond_tx_queue *bd_tx_q;
515
516         int q_id;
517
518         /* Stop slave */
519         rte_eth_dev_stop(slave_eth_dev->data->port_id);
520
521         /* Enable interrupts on slave device */
522         slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
523
524         if (rte_eth_dev_configure(slave_eth_dev->data->port_id,
525                         bonded_eth_dev->data->nb_rx_queues,
526                         bonded_eth_dev->data->nb_tx_queues,
527                         &(slave_eth_dev->data->dev_conf)) != 0) {
528                 RTE_LOG(ERR, PMD, "Cannot configure slave device: port=%u\n",
529                                 slave_eth_dev->data->port_id);
530                 return -1;
531         }
532
533         /* Setup Rx Queues */
534         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
535                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
536
537                 if (rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
538                                 bd_rx_q->nb_rx_desc,
539                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
540                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool) != 0) {
541                         RTE_LOG(ERR, PMD, "rte_eth_rx_queue_setup: port=%d queue_id %d\n",
542                                         slave_eth_dev->data->port_id, q_id);
543                         return -1;
544                 }
545         }
546
547         /* Setup Tx Queues */
548         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
549                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
550
551                 if (rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
552                                 bd_tx_q->nb_tx_desc,
553                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
554                                 &bd_tx_q->tx_conf) != 0) {
555                         RTE_LOG(ERR, PMD, "rte_eth_tx_queue_setup: port=%d queue_id %d\n",
556                                         slave_eth_dev->data->port_id, q_id);
557                         return -1;
558                 }
559         }
560
561         /* Start device */
562         if (rte_eth_dev_start(slave_eth_dev->data->port_id) != 0) {
563                 RTE_LOG(ERR, PMD, "rte_eth_dev_start: port=%u\n",
564                                 slave_eth_dev->data->port_id);
565                 return -1;
566         }
567
568         return 0;
569 }
570
571 struct slave_conf *
572 slave_config_get(struct bond_dev_private *internals, uint8_t slave_port_id)
573 {
574         int i;
575
576         for (i = 0; i < internals->slave_count; i++) {
577                 if (internals->presisted_slaves_conf[i].port_id == slave_port_id)
578                         return &internals->presisted_slaves_conf[i];
579         }
580         return NULL;
581 }
582
583 void
584 slave_config_clear(struct bond_dev_private *internals,
585                 struct rte_eth_dev *slave_eth_dev)
586 {
587         int i, found = 0;
588
589         for (i = 0; i < internals->slave_count; i++) {
590                 if (internals->presisted_slaves_conf[i].port_id ==
591                                 slave_eth_dev->data->port_id) {
592                         found = 1;
593                         memset(&internals->presisted_slaves_conf[i], 0,
594                                         sizeof(internals->presisted_slaves_conf[i]));
595                 }
596                 if (found && i < (internals->slave_count - 1)) {
597                         memcpy(&internals->presisted_slaves_conf[i],
598                                         &internals->presisted_slaves_conf[i+1],
599                                         sizeof(internals->presisted_slaves_conf[i]));
600                 }
601         }
602 }
603
604 void
605 slave_config_store(struct bond_dev_private *internals,
606                 struct rte_eth_dev *slave_eth_dev)
607 {
608         struct slave_conf *presisted_slave_conf =
609                         &internals->presisted_slaves_conf[internals->slave_count];
610
611         presisted_slave_conf->port_id = slave_eth_dev->data->port_id;
612
613         memcpy(&(presisted_slave_conf->mac_addr), slave_eth_dev->data->mac_addrs,
614                         sizeof(struct ether_addr));
615 }
616
617 void
618 bond_ethdev_primary_set(struct bond_dev_private *internals,
619                 uint8_t slave_port_id)
620 {
621         int i;
622
623         if (internals->active_slave_count < 1)
624                 internals->current_primary_port = slave_port_id;
625         else
626                 /* Search bonded device slave ports for new proposed primary port */
627                 for (i = 0; i < internals->active_slave_count; i++) {
628                         if (internals->active_slaves[i] == slave_port_id)
629                                 internals->current_primary_port = slave_port_id;
630                 }
631 }
632
633 static void
634 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
635
636 static int
637 bond_ethdev_start(struct rte_eth_dev *eth_dev)
638 {
639         struct bond_dev_private *internals;
640         int i;
641
642         /* slave eth dev will be started by bonded device */
643         if (valid_bonded_ethdev(eth_dev)) {
644                 RTE_LOG(ERR, PMD,
645                                 "%s: user tried to explicitly start a slave eth_dev (%d) of the bonded eth_dev\n",
646                                 __func__, eth_dev->data->port_id);
647                 return -1;
648         }
649
650         eth_dev->data->dev_link.link_status = 1;
651         eth_dev->data->dev_started = 1;
652
653         internals = eth_dev->data->dev_private;
654
655         if (internals->slave_count == 0) {
656                 RTE_LOG(ERR, PMD,
657                                 "%s: Cannot start port since there are no slave devices\n",
658                                 __func__);
659                 return -1;
660         }
661
662         if (internals->user_defined_mac == 0) {
663                 struct slave_conf *conf = slave_config_get(internals,
664                                 internals->primary_port);
665
666                 if (mac_address_set(eth_dev, &(conf->mac_addr)) != 0) {
667                         RTE_LOG(ERR, PMD,
668                                         "bonded port (%d) failed to update mac address",
669                                         eth_dev->data->port_id);
670                         return -1;
671                 }
672         }
673
674         /* Update all slave devices MACs*/
675         if (mac_address_slaves_update(eth_dev) != 0)
676                 return -1;
677
678         /* If bonded device is configure in promiscuous mode then re-apply config */
679         if (internals->promiscuous_en)
680                 bond_ethdev_promiscuous_enable(eth_dev);
681
682         /* Reconfigure each slave device if starting bonded device */
683         for (i = 0; i < internals->slave_count; i++) {
684                 if (slave_configure(eth_dev, &(rte_eth_devices[internals->slaves[i]]))
685                                 != 0) {
686                         RTE_LOG(ERR, PMD,
687                                         "bonded port (%d) failed to reconfigure slave device %d)",
688                                         eth_dev->data->port_id, internals->slaves[i]);
689                         return -1;
690                 }
691         }
692
693         if (internals->user_defined_primary_port)
694                 bond_ethdev_primary_set(internals, internals->primary_port);
695
696         return 0;
697 }
698
699 static void
700 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
701 {
702         struct bond_dev_private *internals = eth_dev->data->dev_private;
703
704         internals->active_slave_count = 0;
705
706         eth_dev->data->dev_link.link_status = 0;
707         eth_dev->data->dev_started = 0;
708 }
709
710 static void
711 bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
712 {
713 }
714
715 static int
716 bond_ethdev_configure(struct rte_eth_dev *dev __rte_unused)
717 {
718         return 0;
719 }
720
721 static void
722 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
723 {
724         dev_info->driver_name = driver_name;
725         dev_info->max_mac_addrs = 1;
726
727         dev_info->max_rx_pktlen = (uint32_t)2048;
728
729         dev_info->max_rx_queues = (uint16_t)128;
730         dev_info->max_tx_queues = (uint16_t)512;
731
732         dev_info->min_rx_bufsize = 0;
733         dev_info->pci_dev = dev->pci_dev;
734 }
735
736 static int
737 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
738                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
739                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
740 {
741         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
742                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
743                                         0, dev->pci_dev->numa_node);
744         if (bd_rx_q == NULL)
745                 return -1;
746
747         bd_rx_q->queue_id = rx_queue_id;
748         bd_rx_q->dev_private = dev->data->dev_private;
749
750         bd_rx_q->nb_rx_desc = nb_rx_desc;
751
752         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
753         bd_rx_q->mb_pool = mb_pool;
754
755         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
756
757         return 0;
758 }
759
760 static int
761 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
762                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
763                 const struct rte_eth_txconf *tx_conf)
764 {
765         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
766                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
767                                         0, dev->pci_dev->numa_node);
768
769         if (bd_tx_q == NULL)
770                         return -1;
771
772         bd_tx_q->queue_id = tx_queue_id;
773         bd_tx_q->dev_private = dev->data->dev_private;
774
775         bd_tx_q->nb_tx_desc = nb_tx_desc;
776         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
777
778         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
779
780         return 0;
781 }
782
783 static void
784 bond_ethdev_rx_queue_release(void *queue)
785 {
786         if (queue == NULL)
787                 return;
788
789         rte_free(queue);
790 }
791
792 static void
793 bond_ethdev_tx_queue_release(void *queue)
794 {
795         if (queue == NULL)
796                 return;
797
798         rte_free(queue);
799 }
800
801 static int
802 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
803                 int wait_to_complete)
804 {
805         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
806
807         if (!bonded_eth_dev->data->dev_started ||
808                 internals->active_slave_count == 0) {
809                 bonded_eth_dev->data->dev_link.link_status = 0;
810                 return 0;
811         } else {
812                 struct rte_eth_dev *slave_eth_dev;
813                 int i, link_up = 0;
814
815                 for (i = 0; i < internals->active_slave_count; i++) {
816                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
817
818                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
819                                         wait_to_complete);
820                         if (slave_eth_dev->data->dev_link.link_status == 1) {
821                                 link_up = 1;
822                                 break;
823                         }
824                 }
825
826                 bonded_eth_dev->data->dev_link.link_status = link_up;
827         }
828
829         return 0;
830 }
831
832 static void
833 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
834 {
835         struct bond_dev_private *internals = dev->data->dev_private;
836         struct rte_eth_stats slave_stats;
837
838         int i;
839
840         /* clear bonded stats before populating from slaves */
841         memset(stats, 0, sizeof(*stats));
842
843         for (i = 0; i < internals->slave_count; i++) {
844                 rte_eth_stats_get(internals->slaves[i], &slave_stats);
845
846                 stats->ipackets += slave_stats.ipackets;
847                 stats->opackets += slave_stats.opackets;
848                 stats->ibytes += slave_stats.ibytes;
849                 stats->obytes += slave_stats.obytes;
850                 stats->ierrors += slave_stats.ierrors;
851                 stats->oerrors += slave_stats.oerrors;
852                 stats->imcasts += slave_stats.imcasts;
853                 stats->rx_nombuf += slave_stats.rx_nombuf;
854                 stats->fdirmatch += slave_stats.fdirmatch;
855                 stats->fdirmiss += slave_stats.fdirmiss;
856                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
857                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
858                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
859                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
860         }
861 }
862
863 static void
864 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
865 {
866         struct bond_dev_private *internals = dev->data->dev_private;
867         int i;
868
869         for (i = 0; i < internals->slave_count; i++)
870                 rte_eth_stats_reset(internals->slaves[i]);
871 }
872
873 static void
874 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
875 {
876         struct bond_dev_private *internals = eth_dev->data->dev_private;
877         int i;
878
879         internals->promiscuous_en = 1;
880
881         switch (internals->mode) {
882         /* Promiscuous mode is propagated to all slaves */
883         case BONDING_MODE_ROUND_ROBIN:
884         case BONDING_MODE_BALANCE:
885         case BONDING_MODE_BROADCAST:
886                 for (i = 0; i < internals->slave_count; i++)
887                         rte_eth_promiscuous_enable(internals->slaves[i]);
888                 break;
889         /* Promiscuous mode is propagated only to primary slave */
890         case BONDING_MODE_ACTIVE_BACKUP:
891         default:
892                 rte_eth_promiscuous_enable(internals->current_primary_port);
893
894         }
895 }
896
897 static void
898 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
899 {
900         struct bond_dev_private *internals = dev->data->dev_private;
901         int i;
902
903         internals->promiscuous_en = 0;
904
905         switch (internals->mode) {
906         /* Promiscuous mode is propagated to all slaves */
907         case BONDING_MODE_ROUND_ROBIN:
908         case BONDING_MODE_BALANCE:
909         case BONDING_MODE_BROADCAST:
910                 for (i = 0; i < internals->slave_count; i++)
911                         rte_eth_promiscuous_disable(internals->slaves[i]);
912                 break;
913         /* Promiscuous mode is propagated only to primary slave */
914         case BONDING_MODE_ACTIVE_BACKUP:
915         default:
916                 rte_eth_promiscuous_disable(internals->current_primary_port);
917         }
918 }
919
920 void
921 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
922                 void *param)
923 {
924         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
925         struct bond_dev_private *internals;
926         struct rte_eth_link link;
927
928         int i, bonded_port_id, valid_slave, active_pos = -1;
929
930         if (type != RTE_ETH_EVENT_INTR_LSC)
931                 return;
932
933         if (param == NULL)
934                 return;
935
936         bonded_port_id = *(uint8_t *)param;
937
938         bonded_eth_dev = &rte_eth_devices[bonded_port_id];
939         slave_eth_dev = &rte_eth_devices[port_id];
940
941         if (valid_bonded_ethdev(bonded_eth_dev))
942                 return;
943
944         internals = bonded_eth_dev->data->dev_private;
945
946         /* If the device isn't started don't handle interrupts */
947         if (!bonded_eth_dev->data->dev_started)
948                 return;
949
950         /* verify that port_id is a valid slave of bonded port */
951         for (i = 0; i < internals->slave_count; i++) {
952                 if (internals->slaves[i] == port_id) {
953                         valid_slave = 1;
954                         break;
955                 }
956         }
957
958         if (!valid_slave)
959                 return;
960
961         /* Search for port in active port list */
962         for (i = 0; i < internals->active_slave_count; i++) {
963                 if (port_id == internals->active_slaves[i]) {
964                         active_pos = i;
965                         break;
966                 }
967         }
968
969         rte_eth_link_get_nowait(port_id, &link);
970         if (link.link_status) {
971                 if (active_pos == -1) {
972                         /* if no active slave ports then set this port to be primary port */
973                         if (internals->active_slave_count == 0) {
974                                 /* If first active slave, then change link status */
975                                 bonded_eth_dev->data->dev_link.link_status = 1;
976                                 internals->current_primary_port = port_id;
977
978                                 /* Inherit eth dev link properties from first active slave */
979                                 link_properties_set(bonded_eth_dev,
980                                                 &(slave_eth_dev->data->dev_link));
981
982                         }
983                         internals->active_slaves[internals->active_slave_count++] = port_id;
984
985                         /* If user has defined the primary port then default to using it */
986                         if (internals->user_defined_primary_port &&
987                                         internals->primary_port == port_id)
988                                 bond_ethdev_primary_set(internals, port_id);
989
990                 }
991         } else {
992                 if (active_pos != -1) {
993                         /* Remove from active slave list */
994                         for (i = active_pos; i < (internals->active_slave_count - 1); i++)
995                                 internals->active_slaves[i] = internals->active_slaves[i+1];
996
997                         internals->active_slave_count--;
998
999                         /* No active slaves, change link status to down and reset other
1000                          * link properties */
1001                         if (internals->active_slave_count == 0)
1002                                 link_properties_reset(bonded_eth_dev);
1003
1004                         /* Update primary id, take first active slave from list or if none
1005                          * available set to -1 */
1006                         if (port_id == internals->current_primary_port) {
1007                                 if (internals->active_slave_count > 0)
1008                                         bond_ethdev_primary_set(internals,
1009                                                         internals->active_slaves[0]);
1010                                 else
1011                                         internals->current_primary_port = internals->primary_port;
1012                         }
1013                 }
1014         }
1015 }
1016
1017 struct eth_dev_ops default_dev_ops = {
1018                 .dev_start = bond_ethdev_start,
1019                 .dev_stop = bond_ethdev_stop,
1020                 .dev_close = bond_ethdev_close,
1021                 .dev_configure = bond_ethdev_configure,
1022                 .dev_infos_get = bond_ethdev_info,
1023                 .rx_queue_setup = bond_ethdev_rx_queue_setup,
1024                 .tx_queue_setup = bond_ethdev_tx_queue_setup,
1025                 .rx_queue_release = bond_ethdev_rx_queue_release,
1026                 .tx_queue_release = bond_ethdev_tx_queue_release,
1027                 .link_update = bond_ethdev_link_update,
1028                 .stats_get = bond_ethdev_stats_get,
1029                 .stats_reset = bond_ethdev_stats_reset,
1030                 .promiscuous_enable = bond_ethdev_promiscuous_enable,
1031                 .promiscuous_disable = bond_ethdev_promiscuous_disable
1032 };
1033
1034 static int
1035 bond_init(const char *name, const char *params)
1036 {
1037         struct rte_kvargs *kvlist;
1038         uint8_t bonding_mode, socket_id;
1039         int  arg_count, port_id;
1040
1041         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
1042
1043         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
1044         if (kvlist == NULL)
1045                 return -1;
1046
1047         /* Parse link bonding mode */
1048         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
1049                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
1050                                 &bond_ethdev_parse_slave_mode_kvarg, &bonding_mode) != 0) {
1051                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n", name);
1052                         return -1;
1053                 }
1054         } else {
1055                 RTE_LOG(ERR, EAL,
1056                                 "Mode must be specified only once for bonded device %s\n",
1057                                 name);
1058                 return -1;
1059         }
1060
1061         /* Parse socket id to create bonding device on */
1062         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
1063         if (arg_count == 1) {
1064                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
1065                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id) != 0) {
1066                         RTE_LOG(ERR, EAL,
1067                                         "Invalid socket Id specified for bonded device %s\n",
1068                                         name);
1069                         return -1;
1070                 }
1071         } else if (arg_count > 1) {
1072                 RTE_LOG(ERR, EAL,
1073                                 "Socket Id can be specified only once for bonded device %s\n",
1074                                 name);
1075                 return -1;
1076         } else {
1077                 socket_id = rte_socket_id();
1078         }
1079
1080         /* Create link bonding eth device */
1081         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
1082         if (port_id < 0) {
1083                 RTE_LOG(ERR, EAL,
1084                                 "Failed to create socket %s in mode %u on socket %u.\n",
1085                                 name, bonding_mode, socket_id);
1086                 return -1;
1087         }
1088
1089         RTE_LOG(INFO, EAL,
1090                         "Create bonded device %s on port %d in mode %u on socket %u.\n",
1091                         name, port_id, bonding_mode, socket_id);
1092
1093         /* Parse MAC address for bonded device */
1094         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
1095         if (arg_count == 1) {
1096                 struct ether_addr bond_mac;
1097
1098                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
1099                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
1100                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
1101                                         name);
1102                         return -1;
1103                 }
1104
1105                 /* Set MAC address */
1106                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
1107                         RTE_LOG(ERR, EAL,
1108                                         "Failed to set mac address on bonded device %s\n",
1109                                         name);
1110                         return -1;
1111                 }
1112         } else if (arg_count > 1) {
1113                 RTE_LOG(ERR, EAL,
1114                                 "MAC address can be specified only once for bonded device %s\n",
1115                                 name);
1116                 return -1;
1117         }
1118
1119         /* Parse/set balance mode transmit policy */
1120         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
1121         if (arg_count == 1) {
1122                 uint8_t xmit_policy;
1123
1124                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
1125                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
1126                                                 0) {
1127                         RTE_LOG(INFO, EAL,
1128                                         "Invalid xmit policy specified for bonded device %s\n",
1129                                         name);
1130                         return -1;
1131                 }
1132
1133                 /* Set balance mode transmit policy*/
1134                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
1135                         RTE_LOG(ERR, EAL,
1136                                         "Failed to set balance xmit policy on bonded device %s\n",
1137                                         name);
1138                         return -1;
1139                 }
1140         } else if (arg_count > 1) {
1141                 RTE_LOG(ERR, EAL,
1142                                 "Transmit policy can be specified only once for bonded device %s\n",
1143                                 name);
1144                 return -1;
1145         }
1146
1147         /* Parse/add slave ports to bonded device */
1148         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
1149                 struct bond_ethdev_slave_ports slave_ports;
1150                 unsigned i;
1151
1152                 memset(&slave_ports, 0, sizeof(slave_ports));
1153
1154                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
1155                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
1156                         RTE_LOG(ERR, EAL,
1157                                         "Failed to parse slave ports for bonded device %s\n",
1158                                         name);
1159                         return -1;
1160                 }
1161
1162                 for (i = 0; i < slave_ports.slave_count; i++) {
1163                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
1164                                 RTE_LOG(ERR, EAL,
1165                                                 "Failed to add port %d as slave to bonded device %s\n",
1166                                                 slave_ports.slaves[i], name);
1167                         }
1168                 }
1169
1170         } else {
1171                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
1172                 return -1;
1173         }
1174
1175         /* Parse/set primary slave port id*/
1176         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
1177         if (arg_count == 1) {
1178                 uint8_t primary_slave_port_id;
1179
1180                 if (rte_kvargs_process(kvlist,
1181                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
1182                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
1183                                 &primary_slave_port_id) < 0) {
1184                         RTE_LOG(INFO, EAL,
1185                                         "Invalid primary slave port id specified for bonded device %s\n",
1186                                         name);
1187                         return -1;
1188                 }
1189
1190                 /* Set balance mode transmit policy*/
1191                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
1192                                 != 0) {
1193                         RTE_LOG(ERR, EAL,
1194                                         "Failed to set primary slave port %d on bonded device %s\n",
1195                                         primary_slave_port_id, name);
1196                         return -1;
1197                 }
1198         } else if (arg_count > 1) {
1199                 RTE_LOG(INFO, EAL,
1200                                 "Primary slave can be specified only once for bonded device %s\n",
1201                                 name);
1202                 return -1;
1203         }
1204
1205         return 0;
1206 }
1207
1208 static struct rte_driver bond_drv = {
1209         .name = PMD_BOND_NAME,
1210         .type = PMD_BDEV,
1211         .init = bond_init,
1212 };
1213
1214 PMD_REGISTER_DRIVER(bond_drv);