bond: add mode 6
[dpdk.git] / lib / librte_pmd_bond / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
44 #include <rte_dev.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
47
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
51
52 #define REORDER_PERIOD_MS 10
53
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
55
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
58
59 static inline size_t
60 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
61 {
62         size_t vlan_offset = 0;
63
64         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
65                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
66
67                 vlan_offset = sizeof(struct vlan_hdr);
68                 *proto = vlan_hdr->eth_proto;
69
70                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
71                         vlan_hdr = vlan_hdr + 1;
72                         *proto = vlan_hdr->eth_proto;
73                         vlan_offset += sizeof(struct vlan_hdr);
74                 }
75         }
76         return vlan_offset;
77 }
78
79 static uint16_t
80 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
81 {
82         struct bond_dev_private *internals;
83
84         uint16_t num_rx_slave = 0;
85         uint16_t num_rx_total = 0;
86
87         int i;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94
95         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
96                 /* Offset of pointer to *bufs increases as packets are received
97                  * from other slaves */
98                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
99                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
100                 if (num_rx_slave) {
101                         num_rx_total += num_rx_slave;
102                         nb_pkts -= num_rx_slave;
103                 }
104         }
105
106         return num_rx_total;
107 }
108
109 static uint16_t
110 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
111                 uint16_t nb_pkts)
112 {
113         struct bond_dev_private *internals;
114
115         /* Cast to structure, containing bonded device's port id and queue id */
116         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
117
118         internals = bd_rx_q->dev_private;
119
120         return rte_eth_rx_burst(internals->current_primary_port,
121                         bd_rx_q->queue_id, bufs, nb_pkts);
122 }
123
124 static uint16_t
125 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
126                 uint16_t nb_pkts)
127 {
128         /* Cast to structure, containing bonded device's port id and queue id */
129         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
130         struct bond_dev_private *internals = bd_rx_q->dev_private;
131         struct ether_addr bond_mac;
132
133         struct ether_hdr *hdr;
134
135         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136         uint16_t num_rx_total = 0;      /* Total number of received packets */
137         uint8_t slaves[RTE_MAX_ETHPORTS];
138         uint8_t slave_count;
139
140         uint8_t collecting;  /* current slave collecting status */
141         const uint8_t promisc = internals->promiscuous_en;
142         uint8_t i, j, k;
143
144         rte_eth_macaddr_get(internals->port_id, &bond_mac);
145         /* Copy slave list to protect against slave up/down changes during tx
146          * bursting */
147         slave_count = internals->active_slave_count;
148         memcpy(slaves, internals->active_slaves,
149                         sizeof(internals->active_slaves[0]) * slave_count);
150
151         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
152                 j = num_rx_total;
153                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
154
155                 /* Read packets from this slave */
156                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
157                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
158
159                 for (k = j; k < 2 && k < num_rx_total; k++)
160                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
161
162                 /* Handle slow protocol packets. */
163                 while (j < num_rx_total) {
164                         if (j + 3 < num_rx_total)
165                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
166
167                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
168                         /* Remove packet from array if it is slow packet or slave is not
169                          * in collecting state or bondign interface is not in promiscus
170                          * mode and packet address does not match. */
171                         if (unlikely(hdr->ether_type == ether_type_slow_be ||
172                                 !collecting || (!promisc &&
173                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
174
175                                 if (hdr->ether_type == ether_type_slow_be) {
176                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
177                                                 bufs[j]);
178                                 } else
179                                         rte_pktmbuf_free(bufs[j]);
180
181                                 /* Packet is managed by mode 4 or dropped, shift the array */
182                                 num_rx_total--;
183                                 if (j < num_rx_total) {
184                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
185                                                 (num_rx_total - j));
186                                 }
187                         } else
188                                 j++;
189                 }
190         }
191
192         return num_rx_total;
193 }
194
195 static uint16_t
196 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
197 {
198         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
199         struct bond_dev_private *internals = bd_tx_q->dev_private;
200
201         struct ether_hdr *eth_h;
202
203         uint16_t ether_type, offset;
204         uint16_t nb_recv_pkts;
205
206         int i;
207
208         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
209
210         for (i = 0; i < nb_recv_pkts; i++) {
211                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
212                 ether_type = eth_h->ether_type;
213                 offset = get_vlan_offset(eth_h, &ether_type);
214
215                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
216                         bond_mode_alb_arp_recv(eth_h, offset, internals);
217                 }
218         }
219
220         return nb_recv_pkts;
221 }
222
223 static uint16_t
224 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
225                 uint16_t nb_pkts)
226 {
227         struct bond_dev_private *internals;
228         struct bond_tx_queue *bd_tx_q;
229
230         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
231         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
232
233         uint8_t num_of_slaves;
234         uint8_t slaves[RTE_MAX_ETHPORTS];
235
236         uint16_t num_tx_total = 0, num_tx_slave;
237
238         static int slave_idx = 0;
239         int i, cslave_idx = 0, tx_fail_total = 0;
240
241         bd_tx_q = (struct bond_tx_queue *)queue;
242         internals = bd_tx_q->dev_private;
243
244         /* Copy slave list to protect against slave up/down changes during tx
245          * bursting */
246         num_of_slaves = internals->active_slave_count;
247         memcpy(slaves, internals->active_slaves,
248                         sizeof(internals->active_slaves[0]) * num_of_slaves);
249
250         if (num_of_slaves < 1)
251                 return num_tx_total;
252
253         /* Populate slaves mbuf with which packets are to be sent on it  */
254         for (i = 0; i < nb_pkts; i++) {
255                 cslave_idx = (slave_idx + i) % num_of_slaves;
256                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
257         }
258
259         /* increment current slave index so the next call to tx burst starts on the
260          * next slave */
261         slave_idx = ++cslave_idx;
262
263         /* Send packet burst on each slave device */
264         for (i = 0; i < num_of_slaves; i++) {
265                 if (slave_nb_pkts[i] > 0) {
266                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
267                                         slave_bufs[i], slave_nb_pkts[i]);
268
269                         /* if tx burst fails move packets to end of bufs */
270                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
271                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
272
273                                 tx_fail_total += tx_fail_slave;
274
275                                 memcpy(&bufs[nb_pkts - tx_fail_total],
276                                                 &slave_bufs[i][num_tx_slave],
277                                                 tx_fail_slave * sizeof(bufs[0]));
278                         }
279                         num_tx_total += num_tx_slave;
280                 }
281         }
282
283         return num_tx_total;
284 }
285
286 static uint16_t
287 bond_ethdev_tx_burst_active_backup(void *queue,
288                 struct rte_mbuf **bufs, uint16_t nb_pkts)
289 {
290         struct bond_dev_private *internals;
291         struct bond_tx_queue *bd_tx_q;
292
293         bd_tx_q = (struct bond_tx_queue *)queue;
294         internals = bd_tx_q->dev_private;
295
296         if (internals->active_slave_count < 1)
297                 return 0;
298
299         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
300                         bufs, nb_pkts);
301 }
302
303 static inline uint16_t
304 ether_hash(struct ether_hdr *eth_hdr)
305 {
306         uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
307         uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
308
309         return (word_src_addr[0] ^ word_dst_addr[0]) ^
310                         (word_src_addr[1] ^ word_dst_addr[1]) ^
311                         (word_src_addr[2] ^ word_dst_addr[2]);
312 }
313
314 static inline uint32_t
315 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
316 {
317         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
318 }
319
320 static inline uint32_t
321 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
322 {
323         uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
324         uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
325
326         return (word_src_addr[0] ^ word_dst_addr[0]) ^
327                         (word_src_addr[1] ^ word_dst_addr[1]) ^
328                         (word_src_addr[2] ^ word_dst_addr[2]) ^
329                         (word_src_addr[3] ^ word_dst_addr[3]);
330 }
331
332 uint16_t
333 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
334 {
335         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
336
337         uint32_t hash = ether_hash(eth_hdr);
338
339         return (hash ^= hash >> 8) % slave_count;
340 }
341
342 uint16_t
343 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
344 {
345         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
346         uint16_t proto = eth_hdr->ether_type;
347         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
348         uint32_t hash, l3hash = 0;
349
350         hash = ether_hash(eth_hdr);
351
352         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
353                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
354                                 ((char *)(eth_hdr + 1) + vlan_offset);
355                 l3hash = ipv4_hash(ipv4_hdr);
356
357         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
358                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
359                                 ((char *)(eth_hdr + 1) + vlan_offset);
360                 l3hash = ipv6_hash(ipv6_hdr);
361         }
362
363         hash = hash ^ l3hash;
364         hash ^= hash >> 16;
365         hash ^= hash >> 8;
366
367         return hash % slave_count;
368 }
369
370 uint16_t
371 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
372 {
373         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
374         uint16_t proto = eth_hdr->ether_type;
375         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
376
377         struct udp_hdr *udp_hdr = NULL;
378         struct tcp_hdr *tcp_hdr = NULL;
379         uint32_t hash, l3hash = 0, l4hash = 0;
380
381         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
382                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
383                                 ((char *)(eth_hdr + 1) + vlan_offset);
384                 size_t ip_hdr_offset;
385
386                 l3hash = ipv4_hash(ipv4_hdr);
387
388                 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
389                                 IPV4_IHL_MULTIPLIER;
390
391                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
392                         tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
393                                         ip_hdr_offset);
394                         l4hash = HASH_L4_PORTS(tcp_hdr);
395                 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
396                         udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
397                                         ip_hdr_offset);
398                         l4hash = HASH_L4_PORTS(udp_hdr);
399                 }
400         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
401                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
402                                 ((char *)(eth_hdr + 1) + vlan_offset);
403                 l3hash = ipv6_hash(ipv6_hdr);
404
405                 if (ipv6_hdr->proto == IPPROTO_TCP) {
406                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
407                         l4hash = HASH_L4_PORTS(tcp_hdr);
408                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
409                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
410                         l4hash = HASH_L4_PORTS(udp_hdr);
411                 }
412         }
413
414         hash = l3hash ^ l4hash;
415         hash ^= hash >> 16;
416         hash ^= hash >> 8;
417
418         return hash % slave_count;
419 }
420
421 struct bwg_slave {
422         uint64_t bwg_left_int;
423         uint64_t bwg_left_remainder;
424         uint8_t slave;
425 };
426
427 void
428 bond_tlb_activate_slave(struct bond_dev_private *internals) {
429         int i;
430
431         for (i = 0; i < internals->active_slave_count; i++) {
432                 tlb_last_obytets[internals->active_slaves[i]] = 0;
433         }
434 }
435
436 static int
437 bandwidth_cmp(const void *a, const void *b)
438 {
439         const struct bwg_slave *bwg_a = a;
440         const struct bwg_slave *bwg_b = b;
441         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
442         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
443                         (int64_t)bwg_a->bwg_left_remainder;
444         if (diff > 0)
445                 return 1;
446         else if (diff < 0)
447                 return -1;
448         else if (diff2 > 0)
449                 return 1;
450         else if (diff2 < 0)
451                 return -1;
452         else
453                 return 0;
454 }
455
456 static void
457 bandwidth_left(int port_id, uint64_t load, uint8_t update_idx,
458                 struct bwg_slave *bwg_slave)
459 {
460         struct rte_eth_link link_status;
461
462         rte_eth_link_get(port_id, &link_status);
463         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
464         if (link_bwg == 0)
465                 return;
466         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
467         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
468         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
469 }
470
471 static void
472 bond_ethdev_update_tlb_slave_cb(void *arg)
473 {
474         struct bond_dev_private *internals = arg;
475         struct rte_eth_stats slave_stats;
476         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
477         uint8_t slave_count;
478         uint64_t tx_bytes;
479
480         uint8_t update_stats = 0;
481         uint8_t i, slave_id;
482
483         internals->slave_update_idx++;
484
485
486         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
487                 update_stats = 1;
488
489         for (i = 0; i < internals->active_slave_count; i++) {
490                 slave_id = internals->active_slaves[i];
491                 rte_eth_stats_get(slave_id, &slave_stats);
492                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
493                 bandwidth_left(slave_id, tx_bytes,
494                                 internals->slave_update_idx, &bwg_array[i]);
495                 bwg_array[i].slave = slave_id;
496
497                 if (update_stats) {
498                         tlb_last_obytets[slave_id] = slave_stats.obytes;
499                 }
500         }
501
502         if (update_stats == 1)
503                 internals->slave_update_idx = 0;
504
505         slave_count = i;
506         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
507         for (i = 0; i < slave_count; i++)
508                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
509
510         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
511                         (struct bond_dev_private *)internals);
512 }
513
514 static uint16_t
515 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
516 {
517         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
518         struct bond_dev_private *internals = bd_tx_q->dev_private;
519
520         struct rte_eth_dev *primary_port =
521                         &rte_eth_devices[internals->primary_port];
522         uint16_t num_tx_total = 0;
523         uint8_t i, j;
524
525         uint8_t num_of_slaves = internals->active_slave_count;
526         uint8_t slaves[RTE_MAX_ETHPORTS];
527
528         struct ether_hdr *ether_hdr;
529         struct ether_addr primary_slave_addr;
530         struct ether_addr active_slave_addr;
531
532         if (num_of_slaves < 1)
533                 return num_tx_total;
534
535         memcpy(slaves, internals->tlb_slaves_order,
536                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
537
538
539         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
540
541         if (nb_pkts > 3) {
542                 for (i = 0; i < 3; i++)
543                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
544         }
545
546         for (i = 0; i < num_of_slaves; i++) {
547                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
548                 for (j = num_tx_total; j < nb_pkts; j++) {
549                         if (j + 3 < nb_pkts)
550                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
551
552                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
553                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
554                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
555                 }
556
557                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
558                                 bufs + num_tx_total, nb_pkts - num_tx_total);
559
560                 if (num_tx_total == nb_pkts)
561                         break;
562         }
563
564         return num_tx_total;
565 }
566
567 void
568 bond_tlb_disable(struct bond_dev_private *internals)
569 {
570         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
571 }
572
573 void
574 bond_tlb_enable(struct bond_dev_private *internals)
575 {
576         bond_ethdev_update_tlb_slave_cb(internals);
577 }
578
579 static uint16_t
580 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
581 {
582         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
583         struct bond_dev_private *internals = bd_tx_q->dev_private;
584
585         struct ether_hdr *eth_h;
586         uint16_t ether_type, offset;
587
588         struct client_data *client_info;
589
590         /*
591          * We create transmit buffers for every slave and one additional to send
592          * through tlb. In worst case every packet will be send on one port.
593          */
594         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
595         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
596
597         /*
598          * We create separate transmit buffers for update packets as they wont be
599          * counted in num_tx_total.
600          */
601         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
602         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
603
604         struct rte_mbuf *upd_pkt;
605         size_t pkt_size;
606
607         uint16_t num_send, num_not_send = 0;
608         uint16_t num_tx_total = 0;
609         uint8_t slave_idx;
610
611         int i, j;
612
613         /* Search tx buffer for ARP packets and forward them to alb */
614         for (i = 0; i < nb_pkts; i++) {
615                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
616                 ether_type = eth_h->ether_type;
617                 offset = get_vlan_offset(eth_h, &ether_type);
618
619                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
620                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
621
622                         /* Change src mac in eth header */
623                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
624
625                         /* Add packet to slave tx buffer */
626                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
627                         slave_bufs_pkts[slave_idx]++;
628                 } else {
629                         /* If packet is not ARP, send it with TLB policy */
630                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
631                                         bufs[i];
632                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
633                 }
634         }
635
636         /* Update connected client ARP tables */
637         if (internals->mode6.ntt) {
638                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
639                         client_info = &internals->mode6.client_table[i];
640
641                         if (client_info->in_use) {
642                                 /* Allocate new packet to send ARP update on current slave */
643                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
644                                 if (upd_pkt == NULL) {
645                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
646                                         continue;
647                                 }
648                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
649                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
650                                 upd_pkt->data_len = pkt_size;
651                                 upd_pkt->pkt_len = pkt_size;
652
653                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
654                                                 internals);
655
656                                 /* Add packet to update tx buffer */
657                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
658                                 update_bufs_pkts[slave_idx]++;
659                         }
660                 }
661                 internals->mode6.ntt = 0;
662         }
663
664         /* Send ARP packets on proper slaves */
665         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
666                 if (slave_bufs_pkts[i] > 0) {
667                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
668                                         slave_bufs[i], slave_bufs_pkts[i]);
669                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
670                                 bufs[nb_pkts - 1 - num_not_send - j] =
671                                                 slave_bufs[i][nb_pkts - 1 - j];
672                         }
673
674                         num_tx_total += num_send;
675                         num_not_send += slave_bufs_pkts[i] - num_send;
676                 }
677         }
678
679         /* Send update packets on proper slaves */
680         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
681                 if (update_bufs_pkts[i] > 0) {
682                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
683                                         update_bufs_pkts[i]);
684                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
685                                 rte_pktmbuf_free(update_bufs[i][j]);
686                         }
687                 }
688         }
689
690         /* Send non-ARP packets using tlb policy */
691         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
692                 num_send = bond_ethdev_tx_burst_tlb(queue,
693                                 slave_bufs[RTE_MAX_ETHPORTS],
694                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
695
696                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
697                         bufs[nb_pkts - 1 - num_not_send - j] =
698                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
699                 }
700
701                 num_tx_total += num_send;
702                 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
703         }
704
705         return num_tx_total;
706 }
707
708 static uint16_t
709 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
710                 uint16_t nb_pkts)
711 {
712         struct bond_dev_private *internals;
713         struct bond_tx_queue *bd_tx_q;
714
715         uint8_t num_of_slaves;
716         uint8_t slaves[RTE_MAX_ETHPORTS];
717
718         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
719
720         int i, op_slave_id;
721
722         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
723         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
724
725         bd_tx_q = (struct bond_tx_queue *)queue;
726         internals = bd_tx_q->dev_private;
727
728         /* Copy slave list to protect against slave up/down changes during tx
729          * bursting */
730         num_of_slaves = internals->active_slave_count;
731         memcpy(slaves, internals->active_slaves,
732                         sizeof(internals->active_slaves[0]) * num_of_slaves);
733
734         if (num_of_slaves < 1)
735                 return num_tx_total;
736
737         /* Populate slaves mbuf with the packets which are to be sent on it  */
738         for (i = 0; i < nb_pkts; i++) {
739                 /* Select output slave using hash based on xmit policy */
740                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
741
742                 /* Populate slave mbuf arrays with mbufs for that slave */
743                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
744         }
745
746         /* Send packet burst on each slave device */
747         for (i = 0; i < num_of_slaves; i++) {
748                 if (slave_nb_pkts[i] > 0) {
749                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
750                                         slave_bufs[i], slave_nb_pkts[i]);
751
752                         /* if tx burst fails move packets to end of bufs */
753                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
754                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
755
756                                 tx_fail_total += slave_tx_fail_count;
757                                 memcpy(&bufs[nb_pkts - tx_fail_total],
758                                                 &slave_bufs[i][num_tx_slave],
759                                                 slave_tx_fail_count * sizeof(bufs[0]));
760                         }
761
762                         num_tx_total += num_tx_slave;
763                 }
764         }
765
766         return num_tx_total;
767 }
768
769 static uint16_t
770 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
771                 uint16_t nb_pkts)
772 {
773         struct bond_dev_private *internals;
774         struct bond_tx_queue *bd_tx_q;
775
776         uint8_t num_of_slaves;
777         uint8_t slaves[RTE_MAX_ETHPORTS];
778          /* positions in slaves, not ID */
779         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
780         uint8_t distributing_count;
781
782         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
783         uint16_t i, j, op_slave_idx;
784         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
785
786         /* Allocate additional packets in case 8023AD mode. */
787         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
788         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
789
790         /* Total amount of packets in slave_bufs */
791         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
792         /* Slow packets placed in each slave */
793         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
794
795         bd_tx_q = (struct bond_tx_queue *)queue;
796         internals = bd_tx_q->dev_private;
797
798         /* Copy slave list to protect against slave up/down changes during tx
799          * bursting */
800         num_of_slaves = internals->active_slave_count;
801         if (num_of_slaves < 1)
802                 return num_tx_total;
803
804         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
805
806         distributing_count = 0;
807         for (i = 0; i < num_of_slaves; i++) {
808                 struct port *port = &mode_8023ad_ports[slaves[i]];
809
810                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
811                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
812                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
813
814                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
815                         slave_bufs[i][j] = slow_pkts[j];
816
817                 if (ACTOR_STATE(port, DISTRIBUTING))
818                         distributing_offsets[distributing_count++] = i;
819         }
820
821         if (likely(distributing_count > 0)) {
822                 /* Populate slaves mbuf with the packets which are to be sent on it */
823                 for (i = 0; i < nb_pkts; i++) {
824                         /* Select output slave using hash based on xmit policy */
825                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
826
827                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
828                          * slaves that are currently distributing. */
829                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
830                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
831                         slave_nb_pkts[slave_offset]++;
832                 }
833         }
834
835         /* Send packet burst on each slave device */
836         for (i = 0; i < num_of_slaves; i++) {
837                 if (slave_nb_pkts[i] == 0)
838                         continue;
839
840                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
841                                 slave_bufs[i], slave_nb_pkts[i]);
842
843                 /* If tx burst fails drop slow packets */
844                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
845                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
846
847                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
848                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
849
850                 /* If tx burst fails move packets to end of bufs */
851                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
852                         uint16_t j = nb_pkts - num_tx_fail_total;
853                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
854                                 bufs[j] = slave_bufs[i][num_tx_slave];
855                 }
856         }
857
858         return num_tx_total;
859 }
860
861 #ifdef RTE_MBUF_REFCNT
862 static uint16_t
863 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
864                 uint16_t nb_pkts)
865 {
866         struct bond_dev_private *internals;
867         struct bond_tx_queue *bd_tx_q;
868
869         uint8_t tx_failed_flag = 0, num_of_slaves;
870         uint8_t slaves[RTE_MAX_ETHPORTS];
871
872         uint16_t max_nb_of_tx_pkts = 0;
873
874         int slave_tx_total[RTE_MAX_ETHPORTS];
875         int i, most_successful_tx_slave = -1;
876
877         bd_tx_q = (struct bond_tx_queue *)queue;
878         internals = bd_tx_q->dev_private;
879
880         /* Copy slave list to protect against slave up/down changes during tx
881          * bursting */
882         num_of_slaves = internals->active_slave_count;
883         memcpy(slaves, internals->active_slaves,
884                         sizeof(internals->active_slaves[0]) * num_of_slaves);
885
886         if (num_of_slaves < 1)
887                 return 0;
888
889         /* Increment reference count on mbufs */
890         for (i = 0; i < nb_pkts; i++)
891                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
892
893         /* Transmit burst on each active slave */
894         for (i = 0; i < num_of_slaves; i++) {
895                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
896                                         bufs, nb_pkts);
897
898                 if (unlikely(slave_tx_total[i] < nb_pkts))
899                         tx_failed_flag = 1;
900
901                 /* record the value and slave index for the slave which transmits the
902                  * maximum number of packets */
903                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
904                         max_nb_of_tx_pkts = slave_tx_total[i];
905                         most_successful_tx_slave = i;
906                 }
907         }
908
909         /* if slaves fail to transmit packets from burst, the calling application
910          * is not expected to know about multiple references to packets so we must
911          * handle failures of all packets except those of the most successful slave
912          */
913         if (unlikely(tx_failed_flag))
914                 for (i = 0; i < num_of_slaves; i++)
915                         if (i != most_successful_tx_slave)
916                                 while (slave_tx_total[i] < nb_pkts)
917                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
918
919         return max_nb_of_tx_pkts;
920 }
921 #endif
922
923 void
924 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
925                 struct rte_eth_link *slave_dev_link)
926 {
927         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
928         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
929
930         if (slave_dev_link->link_status &&
931                 bonded_eth_dev->data->dev_started) {
932                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
933                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
934
935                 internals->link_props_set = 1;
936         }
937 }
938
939 void
940 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
941 {
942         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
943
944         memset(&(bonded_eth_dev->data->dev_link), 0,
945                         sizeof(bonded_eth_dev->data->dev_link));
946
947         internals->link_props_set = 0;
948 }
949
950 int
951 link_properties_valid(struct rte_eth_link *bonded_dev_link,
952                 struct rte_eth_link *slave_dev_link)
953 {
954         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
955                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
956                 return -1;
957
958         return 0;
959 }
960
961 int
962 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
963 {
964         struct ether_addr *mac_addr;
965
966         if (eth_dev == NULL) {
967                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
968                 return -1;
969         }
970
971         if (dst_mac_addr == NULL) {
972                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
973                 return -1;
974         }
975
976         mac_addr = eth_dev->data->mac_addrs;
977
978         ether_addr_copy(mac_addr, dst_mac_addr);
979         return 0;
980 }
981
982 int
983 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
984 {
985         struct ether_addr *mac_addr;
986
987         if (eth_dev == NULL) {
988                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
989                 return -1;
990         }
991
992         if (new_mac_addr == NULL) {
993                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
994                 return -1;
995         }
996
997         mac_addr = eth_dev->data->mac_addrs;
998
999         /* If new MAC is different to current MAC then update */
1000         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1001                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1002
1003         return 0;
1004 }
1005
1006 int
1007 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1008 {
1009         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1010         int i;
1011
1012         /* Update slave devices MAC addresses */
1013         if (internals->slave_count < 1)
1014                 return -1;
1015
1016         switch (internals->mode) {
1017         case BONDING_MODE_ROUND_ROBIN:
1018         case BONDING_MODE_BALANCE:
1019 #ifdef RTE_MBUF_REFCNT
1020         case BONDING_MODE_BROADCAST:
1021 #endif
1022                 for (i = 0; i < internals->slave_count; i++) {
1023                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1024                                         bonded_eth_dev->data->mac_addrs)) {
1025                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1026                                                 internals->slaves[i].port_id);
1027                                 return -1;
1028                         }
1029                 }
1030                 break;
1031         case BONDING_MODE_8023AD:
1032                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1033                 break;
1034         case BONDING_MODE_ACTIVE_BACKUP:
1035         case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
1036         case BONDING_MODE_ALB:
1037         default:
1038                 for (i = 0; i < internals->slave_count; i++) {
1039                         if (internals->slaves[i].port_id ==
1040                                         internals->current_primary_port) {
1041                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1042                                                 bonded_eth_dev->data->mac_addrs)) {
1043                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1044                                                         internals->current_primary_port);
1045                                         return -1;
1046                                 }
1047                         } else {
1048                                 if (mac_address_set(
1049                                                 &rte_eth_devices[internals->slaves[i].port_id],
1050                                                 &internals->slaves[i].persisted_mac_addr)) {
1051                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1052                                                         internals->slaves[i].port_id);
1053                                         return -1;
1054                                 }
1055                         }
1056                 }
1057         }
1058
1059         return 0;
1060 }
1061
1062 int
1063 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1064 {
1065         struct bond_dev_private *internals;
1066
1067         internals = eth_dev->data->dev_private;
1068
1069         switch (mode) {
1070         case BONDING_MODE_ROUND_ROBIN:
1071                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1072                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1073                 break;
1074         case BONDING_MODE_ACTIVE_BACKUP:
1075                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1076                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1077                 break;
1078         case BONDING_MODE_BALANCE:
1079                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1080                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1081                 break;
1082 #ifdef RTE_MBUF_REFCNT
1083         case BONDING_MODE_BROADCAST:
1084                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1085                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1086                 break;
1087 #endif
1088         case BONDING_MODE_8023AD:
1089                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1090                         return -1;
1091
1092                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1093                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1094                 RTE_LOG(WARNING, PMD,
1095                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1096                                 "at least every 100ms.\n");
1097                 break;
1098         case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
1099                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1100                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1101                 break;
1102         case BONDING_MODE_ALB:
1103                 if (bond_mode_alb_enable(eth_dev) != 0)
1104                         return -1;
1105
1106                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1107                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1108                 break;
1109         default:
1110                 return -1;
1111         }
1112
1113         internals->mode = mode;
1114
1115         return 0;
1116 }
1117
1118 int
1119 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1120                 struct rte_eth_dev *slave_eth_dev)
1121 {
1122         struct bond_rx_queue *bd_rx_q;
1123         struct bond_tx_queue *bd_tx_q;
1124
1125         int errval, q_id;
1126
1127         /* Stop slave */
1128         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1129
1130         /* Enable interrupts on slave device if supported */
1131         if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
1132                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1133
1134         /* Configure device */
1135         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1136                         bonded_eth_dev->data->nb_rx_queues,
1137                         bonded_eth_dev->data->nb_tx_queues,
1138                         &(slave_eth_dev->data->dev_conf));
1139         if (errval != 0) {
1140                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1141                                 slave_eth_dev->data->port_id, errval);
1142                 return errval;
1143         }
1144
1145         /* Setup Rx Queues */
1146         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1147                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1148
1149                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1150                                 bd_rx_q->nb_rx_desc,
1151                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1152                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1153                 if (errval != 0) {
1154                         RTE_BOND_LOG(ERR,
1155                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1156                                         slave_eth_dev->data->port_id, q_id, errval);
1157                         return errval;
1158                 }
1159         }
1160
1161         /* Setup Tx Queues */
1162         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1163                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1164
1165                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1166                                 bd_tx_q->nb_tx_desc,
1167                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1168                                 &bd_tx_q->tx_conf);
1169                 if (errval != 0) {
1170                         RTE_BOND_LOG(ERR,
1171                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1172                                         slave_eth_dev->data->port_id, q_id, errval);
1173                         return errval;
1174                 }
1175         }
1176
1177         /* Start device */
1178         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1179         if (errval != 0) {
1180                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1181                                 slave_eth_dev->data->port_id, errval);
1182                 return -1;
1183         }
1184
1185         return 0;
1186 }
1187
1188 void
1189 slave_remove(struct bond_dev_private *internals,
1190                 struct rte_eth_dev *slave_eth_dev)
1191 {
1192         uint8_t i;
1193
1194         for (i = 0; i < internals->slave_count; i++)
1195                 if (internals->slaves[i].port_id ==
1196                                 slave_eth_dev->data->port_id)
1197                         break;
1198
1199         if (i < (internals->slave_count - 1))
1200                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1201                                 sizeof(internals->slaves[0]) *
1202                                 (internals->slave_count - i - 1));
1203
1204         internals->slave_count--;
1205 }
1206
1207 static void
1208 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1209
1210 void
1211 slave_add(struct bond_dev_private *internals,
1212                 struct rte_eth_dev *slave_eth_dev)
1213 {
1214         struct bond_slave_details *slave_details =
1215                         &internals->slaves[internals->slave_count];
1216
1217         slave_details->port_id = slave_eth_dev->data->port_id;
1218         slave_details->last_link_status = 0;
1219
1220         /* If slave device doesn't support interrupts then we need to enabled
1221          * polling to monitor link status */
1222         if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
1223                 slave_details->link_status_poll_enabled = 1;
1224
1225                 if (!internals->link_status_polling_enabled) {
1226                         internals->link_status_polling_enabled = 1;
1227
1228                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1229                                         bond_ethdev_slave_link_status_change_monitor,
1230                                         (void *)&rte_eth_devices[internals->port_id]);
1231                 }
1232         }
1233
1234         slave_details->link_status_wait_to_complete = 0;
1235         /* clean tlb_last_obytes when adding port for bonding device */
1236         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1237                         sizeof(struct ether_addr));
1238 }
1239
1240 void
1241 bond_ethdev_primary_set(struct bond_dev_private *internals,
1242                 uint8_t slave_port_id)
1243 {
1244         int i;
1245
1246         if (internals->active_slave_count < 1)
1247                 internals->current_primary_port = slave_port_id;
1248         else
1249                 /* Search bonded device slave ports for new proposed primary port */
1250                 for (i = 0; i < internals->active_slave_count; i++) {
1251                         if (internals->active_slaves[i] == slave_port_id)
1252                                 internals->current_primary_port = slave_port_id;
1253                 }
1254 }
1255
1256 static void
1257 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1258
1259 static int
1260 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1261 {
1262         struct bond_dev_private *internals;
1263         int i;
1264
1265         /* slave eth dev will be started by bonded device */
1266         if (valid_bonded_ethdev(eth_dev)) {
1267                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1268                                 eth_dev->data->port_id);
1269                 return -1;
1270         }
1271
1272         eth_dev->data->dev_link.link_status = 0;
1273         eth_dev->data->dev_started = 1;
1274
1275         internals = eth_dev->data->dev_private;
1276
1277         if (internals->slave_count == 0) {
1278                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1279                 return -1;
1280         }
1281
1282         if (internals->user_defined_mac == 0) {
1283                 struct ether_addr *new_mac_addr = NULL;
1284
1285                 for (i = 0; i < internals->slave_count; i++)
1286                         if (internals->slaves[i].port_id == internals->primary_port)
1287                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1288
1289                 if (new_mac_addr == NULL)
1290                         return -1;
1291
1292                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1293                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1294                                         eth_dev->data->port_id);
1295                         return -1;
1296                 }
1297         }
1298
1299         /* Update all slave devices MACs*/
1300         if (mac_address_slaves_update(eth_dev) != 0)
1301                 return -1;
1302
1303         /* If bonded device is configure in promiscuous mode then re-apply config */
1304         if (internals->promiscuous_en)
1305                 bond_ethdev_promiscuous_enable(eth_dev);
1306
1307         /* Reconfigure each slave device if starting bonded device */
1308         for (i = 0; i < internals->slave_count; i++) {
1309                 if (slave_configure(eth_dev,
1310                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1311                         RTE_BOND_LOG(ERR,
1312                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1313                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1314                         return -1;
1315                 }
1316         }
1317
1318         if (internals->user_defined_primary_port)
1319                 bond_ethdev_primary_set(internals, internals->primary_port);
1320
1321         if (internals->mode == BONDING_MODE_8023AD)
1322                 bond_mode_8023ad_start(eth_dev);
1323
1324         if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING ||
1325                         internals->mode == BONDING_MODE_ALB)
1326                 bond_tlb_enable(internals);
1327
1328         return 0;
1329 }
1330
1331 static void
1332 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1333 {
1334         struct bond_dev_private *internals = eth_dev->data->dev_private;
1335         uint8_t i;
1336
1337         if (internals->mode == BONDING_MODE_8023AD) {
1338                 struct port *port;
1339                 void *pkt = NULL;
1340
1341                 bond_mode_8023ad_stop(eth_dev);
1342
1343                 /* Discard all messages to/from mode 4 state machines */
1344                 for (i = 0; i < internals->slave_count; i++) {
1345                         port = &mode_8023ad_ports[internals->slaves[i].port_id];
1346
1347                         RTE_VERIFY(port->rx_ring != NULL);
1348                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1349                                 rte_pktmbuf_free(pkt);
1350
1351                         RTE_VERIFY(port->tx_ring != NULL);
1352                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1353                                 rte_pktmbuf_free(pkt);
1354                 }
1355         }
1356
1357         if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING ||
1358                         internals->mode == BONDING_MODE_ALB) {
1359                 bond_tlb_disable(internals);
1360                 for (i = 0; i < internals->active_slave_count; i++)
1361                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1362         }
1363
1364         internals->active_slave_count = 0;
1365         internals->link_status_polling_enabled = 0;
1366
1367         eth_dev->data->dev_link.link_status = 0;
1368         eth_dev->data->dev_started = 0;
1369 }
1370
1371 static void
1372 bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
1373 {
1374 }
1375
1376 /* forward declaration */
1377 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1378
1379 static void
1380 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1381 {
1382         struct bond_dev_private *internals = dev->data->dev_private;
1383
1384         dev_info->driver_name = driver_name;
1385         dev_info->max_mac_addrs = 1;
1386
1387         dev_info->max_rx_pktlen = (uint32_t)2048;
1388
1389         dev_info->max_rx_queues = (uint16_t)128;
1390         dev_info->max_tx_queues = (uint16_t)512;
1391
1392         dev_info->min_rx_bufsize = 0;
1393         dev_info->pci_dev = dev->pci_dev;
1394
1395         dev_info->rx_offload_capa = internals->rx_offload_capa;
1396         dev_info->tx_offload_capa = internals->tx_offload_capa;
1397 }
1398
1399 static int
1400 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1401                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1402                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1403 {
1404         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1405                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1406                                         0, dev->pci_dev->numa_node);
1407         if (bd_rx_q == NULL)
1408                 return -1;
1409
1410         bd_rx_q->queue_id = rx_queue_id;
1411         bd_rx_q->dev_private = dev->data->dev_private;
1412
1413         bd_rx_q->nb_rx_desc = nb_rx_desc;
1414
1415         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1416         bd_rx_q->mb_pool = mb_pool;
1417
1418         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1419
1420         return 0;
1421 }
1422
1423 static int
1424 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1425                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1426                 const struct rte_eth_txconf *tx_conf)
1427 {
1428         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1429                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1430                                         0, dev->pci_dev->numa_node);
1431
1432         if (bd_tx_q == NULL)
1433                 return -1;
1434
1435         bd_tx_q->queue_id = tx_queue_id;
1436         bd_tx_q->dev_private = dev->data->dev_private;
1437
1438         bd_tx_q->nb_tx_desc = nb_tx_desc;
1439         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1440
1441         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1442
1443         return 0;
1444 }
1445
1446 static void
1447 bond_ethdev_rx_queue_release(void *queue)
1448 {
1449         if (queue == NULL)
1450                 return;
1451
1452         rte_free(queue);
1453 }
1454
1455 static void
1456 bond_ethdev_tx_queue_release(void *queue)
1457 {
1458         if (queue == NULL)
1459                 return;
1460
1461         rte_free(queue);
1462 }
1463
1464 static void
1465 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1466 {
1467         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1468         struct bond_dev_private *internals;
1469
1470         /* Default value for polling slave found is true as we don't want to
1471          * disable the polling thread if we cannot get the lock */
1472         int i, polling_slave_found = 1;
1473
1474         if (cb_arg == NULL)
1475                 return;
1476
1477         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1478         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1479
1480         if (!bonded_ethdev->data->dev_started ||
1481                 !internals->link_status_polling_enabled)
1482                 return;
1483
1484         /* If device is currently being configured then don't check slaves link
1485          * status, wait until next period */
1486         if (rte_spinlock_trylock(&internals->lock)) {
1487                 if (internals->slave_count > 0)
1488                         polling_slave_found = 0;
1489
1490                 for (i = 0; i < internals->slave_count; i++) {
1491                         if (!internals->slaves[i].link_status_poll_enabled)
1492                                 continue;
1493
1494                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1495                         polling_slave_found = 1;
1496
1497                         /* Update slave link status */
1498                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1499                                         internals->slaves[i].link_status_wait_to_complete);
1500
1501                         /* if link status has changed since last checked then call lsc
1502                          * event callback */
1503                         if (slave_ethdev->data->dev_link.link_status !=
1504                                         internals->slaves[i].last_link_status) {
1505                                 internals->slaves[i].last_link_status =
1506                                                 slave_ethdev->data->dev_link.link_status;
1507
1508                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1509                                                 RTE_ETH_EVENT_INTR_LSC,
1510                                                 &bonded_ethdev->data->port_id);
1511                         }
1512                 }
1513                 rte_spinlock_unlock(&internals->lock);
1514         }
1515
1516         if (polling_slave_found)
1517                 /* Set alarm to continue monitoring link status of slave ethdev's */
1518                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1519                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1520 }
1521
1522 static int
1523 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1524                 int wait_to_complete)
1525 {
1526         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1527
1528         if (!bonded_eth_dev->data->dev_started ||
1529                 internals->active_slave_count == 0) {
1530                 bonded_eth_dev->data->dev_link.link_status = 0;
1531                 return 0;
1532         } else {
1533                 struct rte_eth_dev *slave_eth_dev;
1534                 int i, link_up = 0;
1535
1536                 for (i = 0; i < internals->active_slave_count; i++) {
1537                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1538
1539                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1540                                         wait_to_complete);
1541                         if (slave_eth_dev->data->dev_link.link_status == 1) {
1542                                 link_up = 1;
1543                                 break;
1544                         }
1545                 }
1546
1547                 bonded_eth_dev->data->dev_link.link_status = link_up;
1548         }
1549
1550         return 0;
1551 }
1552
1553 static void
1554 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1555 {
1556         struct bond_dev_private *internals = dev->data->dev_private;
1557         struct rte_eth_stats slave_stats;
1558
1559         int i;
1560
1561         /* clear bonded stats before populating from slaves */
1562         memset(stats, 0, sizeof(*stats));
1563
1564         for (i = 0; i < internals->slave_count; i++) {
1565                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1566
1567                 stats->ipackets += slave_stats.ipackets;
1568                 stats->opackets += slave_stats.opackets;
1569                 stats->ibytes += slave_stats.ibytes;
1570                 stats->obytes += slave_stats.obytes;
1571                 stats->ierrors += slave_stats.ierrors;
1572                 stats->oerrors += slave_stats.oerrors;
1573                 stats->imcasts += slave_stats.imcasts;
1574                 stats->rx_nombuf += slave_stats.rx_nombuf;
1575                 stats->fdirmatch += slave_stats.fdirmatch;
1576                 stats->fdirmiss += slave_stats.fdirmiss;
1577                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1578                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1579                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1580                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1581         }
1582 }
1583
1584 static void
1585 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1586 {
1587         struct bond_dev_private *internals = dev->data->dev_private;
1588         int i;
1589
1590         for (i = 0; i < internals->slave_count; i++)
1591                 rte_eth_stats_reset(internals->slaves[i].port_id);
1592 }
1593
1594 static void
1595 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1596 {
1597         struct bond_dev_private *internals = eth_dev->data->dev_private;
1598         int i;
1599
1600         internals->promiscuous_en = 1;
1601
1602         switch (internals->mode) {
1603         /* Promiscuous mode is propagated to all slaves */
1604         case BONDING_MODE_ROUND_ROBIN:
1605         case BONDING_MODE_BALANCE:
1606 #ifdef RTE_MBUF_REFCNT
1607         case BONDING_MODE_BROADCAST:
1608 #endif
1609                 for (i = 0; i < internals->slave_count; i++)
1610                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1611                 break;
1612         /* In mode4 promiscus mode is managed when slave is added/removed */
1613         case BONDING_MODE_8023AD:
1614                 break;
1615         /* Promiscuous mode is propagated only to primary slave */
1616         case BONDING_MODE_ACTIVE_BACKUP:
1617         case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
1618         case BONDING_MODE_ALB:
1619         default:
1620                 rte_eth_promiscuous_enable(internals->current_primary_port);
1621         }
1622 }
1623
1624 static void
1625 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1626 {
1627         struct bond_dev_private *internals = dev->data->dev_private;
1628         int i;
1629
1630         internals->promiscuous_en = 0;
1631
1632         switch (internals->mode) {
1633         /* Promiscuous mode is propagated to all slaves */
1634         case BONDING_MODE_ROUND_ROBIN:
1635         case BONDING_MODE_BALANCE:
1636 #ifdef RTE_MBUF_REFCNT
1637         case BONDING_MODE_BROADCAST:
1638 #endif
1639                 for (i = 0; i < internals->slave_count; i++)
1640                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1641                 break;
1642         /* In mode4 promiscus mode is set managed when slave is added/removed */
1643         case BONDING_MODE_8023AD:
1644                 break;
1645         /* Promiscuous mode is propagated only to primary slave */
1646         case BONDING_MODE_ACTIVE_BACKUP:
1647         case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
1648         case BONDING_MODE_ALB:
1649         default:
1650                 rte_eth_promiscuous_disable(internals->current_primary_port);
1651         }
1652 }
1653
1654 static void
1655 bond_ethdev_delayed_lsc_propagation(void *arg)
1656 {
1657         if (arg == NULL)
1658                 return;
1659
1660         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1661                         RTE_ETH_EVENT_INTR_LSC);
1662 }
1663
1664 void
1665 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1666                 void *param)
1667 {
1668         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1669         struct bond_dev_private *internals;
1670         struct rte_eth_link link;
1671
1672         int i, valid_slave = 0;
1673         uint8_t active_pos;
1674         uint8_t lsc_flag = 0;
1675
1676         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1677                 return;
1678
1679         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1680         slave_eth_dev = &rte_eth_devices[port_id];
1681
1682         if (valid_bonded_ethdev(bonded_eth_dev))
1683                 return;
1684
1685         internals = bonded_eth_dev->data->dev_private;
1686
1687         /* If the device isn't started don't handle interrupts */
1688         if (!bonded_eth_dev->data->dev_started)
1689                 return;
1690
1691         /* verify that port_id is a valid slave of bonded port */
1692         for (i = 0; i < internals->slave_count; i++) {
1693                 if (internals->slaves[i].port_id == port_id) {
1694                         valid_slave = 1;
1695                         break;
1696                 }
1697         }
1698
1699         if (!valid_slave)
1700                 return;
1701
1702         /* Search for port in active port list */
1703         active_pos = find_slave_by_id(internals->active_slaves,
1704                         internals->active_slave_count, port_id);
1705
1706         rte_eth_link_get_nowait(port_id, &link);
1707         if (link.link_status) {
1708                 if (active_pos < internals->active_slave_count)
1709                         return;
1710
1711                 /* if no active slave ports then set this port to be primary port */
1712                 if (internals->active_slave_count < 1) {
1713                         /* If first active slave, then change link status */
1714                         bonded_eth_dev->data->dev_link.link_status = 1;
1715                         internals->current_primary_port = port_id;
1716                         lsc_flag = 1;
1717
1718                         mac_address_slaves_update(bonded_eth_dev);
1719
1720                         /* Inherit eth dev link properties from first active slave */
1721                         link_properties_set(bonded_eth_dev,
1722                                         &(slave_eth_dev->data->dev_link));
1723                 }
1724
1725                 activate_slave(bonded_eth_dev, port_id);
1726
1727                 /* If user has defined the primary port then default to using it */
1728                 if (internals->user_defined_primary_port &&
1729                                 internals->primary_port == port_id)
1730                         bond_ethdev_primary_set(internals, port_id);
1731         } else {
1732                 if (active_pos == internals->active_slave_count)
1733                         return;
1734
1735                 /* Remove from active slave list */
1736                 deactivate_slave(bonded_eth_dev, port_id);
1737
1738                 /* No active slaves, change link status to down and reset other
1739                  * link properties */
1740                 if (internals->active_slave_count < 1) {
1741                         lsc_flag = 1;
1742                         bonded_eth_dev->data->dev_link.link_status = 0;
1743
1744                         link_properties_reset(bonded_eth_dev);
1745                 }
1746
1747                 /* Update primary id, take first active slave from list or if none
1748                  * available set to -1 */
1749                 if (port_id == internals->current_primary_port) {
1750                         if (internals->active_slave_count > 0)
1751                                 bond_ethdev_primary_set(internals,
1752                                                 internals->active_slaves[0]);
1753                         else
1754                                 internals->current_primary_port = internals->primary_port;
1755                 }
1756         }
1757
1758         if (lsc_flag) {
1759                 /* Cancel any possible outstanding interrupts if delays are enabled */
1760                 if (internals->link_up_delay_ms > 0 ||
1761                         internals->link_down_delay_ms > 0)
1762                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
1763                                         bonded_eth_dev);
1764
1765                 if (bonded_eth_dev->data->dev_link.link_status) {
1766                         if (internals->link_up_delay_ms > 0)
1767                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
1768                                                 bond_ethdev_delayed_lsc_propagation,
1769                                                 (void *)bonded_eth_dev);
1770                         else
1771                                 _rte_eth_dev_callback_process(bonded_eth_dev,
1772                                                 RTE_ETH_EVENT_INTR_LSC);
1773
1774                 } else {
1775                         if (internals->link_down_delay_ms > 0)
1776                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
1777                                                 bond_ethdev_delayed_lsc_propagation,
1778                                                 (void *)bonded_eth_dev);
1779                         else
1780                                 _rte_eth_dev_callback_process(bonded_eth_dev,
1781                                                 RTE_ETH_EVENT_INTR_LSC);
1782                 }
1783         }
1784 }
1785
1786 struct eth_dev_ops default_dev_ops = {
1787                 .dev_start = bond_ethdev_start,
1788                 .dev_stop = bond_ethdev_stop,
1789                 .dev_close = bond_ethdev_close,
1790                 .dev_configure = bond_ethdev_configure,
1791                 .dev_infos_get = bond_ethdev_info,
1792                 .rx_queue_setup = bond_ethdev_rx_queue_setup,
1793                 .tx_queue_setup = bond_ethdev_tx_queue_setup,
1794                 .rx_queue_release = bond_ethdev_rx_queue_release,
1795                 .tx_queue_release = bond_ethdev_tx_queue_release,
1796                 .link_update = bond_ethdev_link_update,
1797                 .stats_get = bond_ethdev_stats_get,
1798                 .stats_reset = bond_ethdev_stats_reset,
1799                 .promiscuous_enable = bond_ethdev_promiscuous_enable,
1800                 .promiscuous_disable = bond_ethdev_promiscuous_disable
1801 };
1802
1803 static int
1804 bond_init(const char *name, const char *params)
1805 {
1806         struct bond_dev_private *internals;
1807         struct rte_kvargs *kvlist;
1808         uint8_t bonding_mode, socket_id;
1809         int  arg_count, port_id;
1810
1811         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
1812
1813         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
1814         if (kvlist == NULL)
1815                 return -1;
1816
1817         /* Parse link bonding mode */
1818         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
1819                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
1820                                 &bond_ethdev_parse_slave_mode_kvarg,
1821                                 &bonding_mode) != 0) {
1822                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
1823                                         name);
1824                         goto parse_error;
1825                 }
1826         } else {
1827                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
1828                                 "device %s\n", name);
1829                 goto parse_error;
1830         }
1831
1832         /* Parse socket id to create bonding device on */
1833         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
1834         if (arg_count == 1) {
1835                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
1836                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
1837                                 != 0) {
1838                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
1839                                         "bonded device %s\n", name);
1840                         goto parse_error;
1841                 }
1842         } else if (arg_count > 1) {
1843                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
1844                                 "bonded device %s\n", name);
1845                 goto parse_error;
1846         } else {
1847                 socket_id = rte_socket_id();
1848         }
1849
1850         /* Create link bonding eth device */
1851         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
1852         if (port_id < 0) {
1853                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
1854                                 "socket %u.\n", name, bonding_mode, socket_id);
1855                 goto parse_error;
1856         }
1857         internals = rte_eth_devices[port_id].data->dev_private;
1858         internals->kvlist = kvlist;
1859
1860         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
1861                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
1862         return 0;
1863
1864 parse_error:
1865         rte_kvargs_free(kvlist);
1866
1867         return -1;
1868 }
1869
1870 /* this part will resolve the slave portids after all the other pdev and vdev
1871  * have been allocated */
1872 static int
1873 bond_ethdev_configure(struct rte_eth_dev *dev)
1874 {
1875         char *name = dev->data->name;
1876         struct bond_dev_private *internals = dev->data->dev_private;
1877         struct rte_kvargs *kvlist = internals->kvlist;
1878         int arg_count, port_id = dev - rte_eth_devices;
1879
1880         /*
1881          * if no kvlist, it means that this bonded device has been created
1882          * through the bonding api.
1883          */
1884         if (!kvlist)
1885                 return 0;
1886
1887         /* Parse MAC address for bonded device */
1888         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
1889         if (arg_count == 1) {
1890                 struct ether_addr bond_mac;
1891
1892                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
1893                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
1894                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
1895                                         name);
1896                         return -1;
1897                 }
1898
1899                 /* Set MAC address */
1900                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
1901                         RTE_LOG(ERR, EAL,
1902                                         "Failed to set mac address on bonded device %s\n",
1903                                         name);
1904                         return -1;
1905                 }
1906         } else if (arg_count > 1) {
1907                 RTE_LOG(ERR, EAL,
1908                                 "MAC address can be specified only once for bonded device %s\n",
1909                                 name);
1910                 return -1;
1911         }
1912
1913         /* Parse/set balance mode transmit policy */
1914         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
1915         if (arg_count == 1) {
1916                 uint8_t xmit_policy;
1917
1918                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
1919                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
1920                                                 0) {
1921                         RTE_LOG(INFO, EAL,
1922                                         "Invalid xmit policy specified for bonded device %s\n",
1923                                         name);
1924                         return -1;
1925                 }
1926
1927                 /* Set balance mode transmit policy*/
1928                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
1929                         RTE_LOG(ERR, EAL,
1930                                         "Failed to set balance xmit policy on bonded device %s\n",
1931                                         name);
1932                         return -1;
1933                 }
1934         } else if (arg_count > 1) {
1935                 RTE_LOG(ERR, EAL,
1936                                 "Transmit policy can be specified only once for bonded device"
1937                                 " %s\n", name);
1938                 return -1;
1939         }
1940
1941         /* Parse/add slave ports to bonded device */
1942         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
1943                 struct bond_ethdev_slave_ports slave_ports;
1944                 unsigned i;
1945
1946                 memset(&slave_ports, 0, sizeof(slave_ports));
1947
1948                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
1949                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
1950                         RTE_LOG(ERR, EAL,
1951                                         "Failed to parse slave ports for bonded device %s\n",
1952                                         name);
1953                         return -1;
1954                 }
1955
1956                 for (i = 0; i < slave_ports.slave_count; i++) {
1957                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
1958                                 RTE_LOG(ERR, EAL,
1959                                                 "Failed to add port %d as slave to bonded device %s\n",
1960                                                 slave_ports.slaves[i], name);
1961                         }
1962                 }
1963
1964         } else {
1965                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
1966                 return -1;
1967         }
1968
1969         /* Parse/set primary slave port id*/
1970         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
1971         if (arg_count == 1) {
1972                 uint8_t primary_slave_port_id;
1973
1974                 if (rte_kvargs_process(kvlist,
1975                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
1976                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
1977                                 &primary_slave_port_id) < 0) {
1978                         RTE_LOG(INFO, EAL,
1979                                         "Invalid primary slave port id specified for bonded device"
1980                                         " %s\n", name);
1981                         return -1;
1982                 }
1983
1984                 /* Set balance mode transmit policy*/
1985                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
1986                                 != 0) {
1987                         RTE_LOG(ERR, EAL,
1988                                         "Failed to set primary slave port %d on bonded device %s\n",
1989                                         primary_slave_port_id, name);
1990                         return -1;
1991                 }
1992         } else if (arg_count > 1) {
1993                 RTE_LOG(INFO, EAL,
1994                                 "Primary slave can be specified only once for bonded device"
1995                                 " %s\n", name);
1996                 return -1;
1997         }
1998
1999         /* Parse link status monitor polling interval */
2000         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2001         if (arg_count == 1) {
2002                 uint32_t lsc_poll_interval_ms;
2003
2004                 if (rte_kvargs_process(kvlist,
2005                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2006                                 &bond_ethdev_parse_time_ms_kvarg,
2007                                 &lsc_poll_interval_ms) < 0) {
2008                         RTE_LOG(INFO, EAL,
2009                                         "Invalid lsc polling interval value specified for bonded"
2010                                         " device %s\n", name);
2011                         return -1;
2012                 }
2013
2014                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2015                                 != 0) {
2016                         RTE_LOG(ERR, EAL,
2017                                         "Failed to set lsc monitor polling interval (%u ms) on"
2018                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2019                         return -1;
2020                 }
2021         } else if (arg_count > 1) {
2022                 RTE_LOG(INFO, EAL,
2023                                 "LSC polling interval can be specified only once for bonded"
2024                                 " device %s\n", name);
2025                 return -1;
2026         }
2027
2028         /* Parse link up interrupt propagation delay */
2029         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2030         if (arg_count == 1) {
2031                 uint32_t link_up_delay_ms;
2032
2033                 if (rte_kvargs_process(kvlist,
2034                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2035                                 &bond_ethdev_parse_time_ms_kvarg,
2036                                 &link_up_delay_ms) < 0) {
2037                         RTE_LOG(INFO, EAL,
2038                                         "Invalid link up propagation delay value specified for"
2039                                         " bonded device %s\n", name);
2040                         return -1;
2041                 }
2042
2043                 /* Set balance mode transmit policy*/
2044                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2045                                 != 0) {
2046                         RTE_LOG(ERR, EAL,
2047                                         "Failed to set link up propagation delay (%u ms) on bonded"
2048                                         " device %s\n", link_up_delay_ms, name);
2049                         return -1;
2050                 }
2051         } else if (arg_count > 1) {
2052                 RTE_LOG(INFO, EAL,
2053                                 "Link up propagation delay can be specified only once for"
2054                                 " bonded device %s\n", name);
2055                 return -1;
2056         }
2057
2058         /* Parse link down interrupt propagation delay */
2059         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2060         if (arg_count == 1) {
2061                 uint32_t link_down_delay_ms;
2062
2063                 if (rte_kvargs_process(kvlist,
2064                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2065                                 &bond_ethdev_parse_time_ms_kvarg,
2066                                 &link_down_delay_ms) < 0) {
2067                         RTE_LOG(INFO, EAL,
2068                                         "Invalid link down propagation delay value specified for"
2069                                         " bonded device %s\n", name);
2070                         return -1;
2071                 }
2072
2073                 /* Set balance mode transmit policy*/
2074                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2075                                 != 0) {
2076                         RTE_LOG(ERR, EAL,
2077                                         "Failed to set link down propagation delay (%u ms) on"
2078                                         " bonded device %s\n", link_down_delay_ms, name);
2079                         return -1;
2080                 }
2081         } else if (arg_count > 1) {
2082                 RTE_LOG(INFO, EAL,
2083                                 "Link down propagation delay can be specified only once for"
2084                                 " bonded device %s\n", name);
2085                 return -1;
2086         }
2087
2088         return 0;
2089 }
2090
2091 static struct rte_driver bond_drv = {
2092         .name = "eth_bond",
2093         .type = PMD_VDEV,
2094         .init = bond_init,
2095 };
2096
2097 PMD_REGISTER_DRIVER(bond_drv);