net/bonding: remove useless assignment
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_ip_frag.h>
43 #include <rte_devargs.h>
44 #include <rte_kvargs.h>
45 #include <rte_vdev.h>
46 #include <rte_alarm.h>
47 #include <rte_cycles.h>
48
49 #include "rte_eth_bond.h"
50 #include "rte_eth_bond_private.h"
51 #include "rte_eth_bond_8023ad_private.h"
52
53 #define REORDER_PERIOD_MS 10
54
55 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56
57 /* Table for statistics in mode 5 TLB */
58 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
59
60 static inline size_t
61 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62 {
63         size_t vlan_offset = 0;
64
65         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
66                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
67
68                 vlan_offset = sizeof(struct vlan_hdr);
69                 *proto = vlan_hdr->eth_proto;
70
71                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
72                         vlan_hdr = vlan_hdr + 1;
73                         *proto = vlan_hdr->eth_proto;
74                         vlan_offset += sizeof(struct vlan_hdr);
75                 }
76         }
77         return vlan_offset;
78 }
79
80 static uint16_t
81 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
82 {
83         struct bond_dev_private *internals;
84
85         uint16_t num_rx_slave = 0;
86         uint16_t num_rx_total = 0;
87
88         int i;
89
90         /* Cast to structure, containing bonded device's port id and queue id */
91         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92
93         internals = bd_rx_q->dev_private;
94
95
96         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
97                 /* Offset of pointer to *bufs increases as packets are received
98                  * from other slaves */
99                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
100                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
101                 if (num_rx_slave) {
102                         num_rx_total += num_rx_slave;
103                         nb_pkts -= num_rx_slave;
104                 }
105         }
106
107         return num_rx_total;
108 }
109
110 static uint16_t
111 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
112                 uint16_t nb_pkts)
113 {
114         struct bond_dev_private *internals;
115
116         /* Cast to structure, containing bonded device's port id and queue id */
117         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
118
119         internals = bd_rx_q->dev_private;
120
121         return rte_eth_rx_burst(internals->current_primary_port,
122                         bd_rx_q->queue_id, bufs, nb_pkts);
123 }
124
125 static inline uint8_t
126 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
127 {
128         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
129
130         return !vlan_tci && (ethertype == ether_type_slow_be &&
131                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
132 }
133
134 static uint16_t
135 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
136                 uint16_t nb_pkts)
137 {
138         /* Cast to structure, containing bonded device's port id and queue id */
139         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
140         struct bond_dev_private *internals = bd_rx_q->dev_private;
141         struct ether_addr bond_mac;
142
143         struct ether_hdr *hdr;
144
145         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
146         uint16_t num_rx_total = 0;      /* Total number of received packets */
147         uint8_t slaves[RTE_MAX_ETHPORTS];
148         uint8_t slave_count;
149
150         uint8_t collecting;  /* current slave collecting status */
151         const uint8_t promisc = internals->promiscuous_en;
152         uint8_t i, j, k;
153         uint8_t subtype;
154
155         rte_eth_macaddr_get(internals->port_id, &bond_mac);
156         /* Copy slave list to protect against slave up/down changes during tx
157          * bursting */
158         slave_count = internals->active_slave_count;
159         memcpy(slaves, internals->active_slaves,
160                         sizeof(internals->active_slaves[0]) * slave_count);
161
162         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
163                 j = num_rx_total;
164                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
165
166                 /* Read packets from this slave */
167                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
168                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
169
170                 for (k = j; k < 2 && k < num_rx_total; k++)
171                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
172
173                 /* Handle slow protocol packets. */
174                 while (j < num_rx_total) {
175                         if (j + 3 < num_rx_total)
176                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
177
178                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
179                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
180
181                         /* Remove packet from array if it is slow packet or slave is not
182                          * in collecting state or bondign interface is not in promiscus
183                          * mode and packet address does not match. */
184                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
185                                 !collecting || (!promisc &&
186                                         !is_multicast_ether_addr(&hdr->d_addr) &&
187                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
188
189                                 if (hdr->ether_type == ether_type_slow_be) {
190                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
191                                                 bufs[j]);
192                                 } else
193                                         rte_pktmbuf_free(bufs[j]);
194
195                                 /* Packet is managed by mode 4 or dropped, shift the array */
196                                 num_rx_total--;
197                                 if (j < num_rx_total) {
198                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
199                                                 (num_rx_total - j));
200                                 }
201                         } else
202                                 j++;
203                 }
204         }
205
206         return num_rx_total;
207 }
208
209 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
210 uint32_t burstnumberRX;
211 uint32_t burstnumberTX;
212
213 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
214
215 static void
216 arp_op_name(uint16_t arp_op, char *buf)
217 {
218         switch (arp_op) {
219         case ARP_OP_REQUEST:
220                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
221                 return;
222         case ARP_OP_REPLY:
223                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
224                 return;
225         case ARP_OP_REVREQUEST:
226                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
227                                 "Reverse ARP Request");
228                 return;
229         case ARP_OP_REVREPLY:
230                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
231                                 "Reverse ARP Reply");
232                 return;
233         case ARP_OP_INVREQUEST:
234                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
235                                 "Peer Identify Request");
236                 return;
237         case ARP_OP_INVREPLY:
238                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
239                                 "Peer Identify Reply");
240                 return;
241         default:
242                 break;
243         }
244         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
245         return;
246 }
247 #endif
248 #define MaxIPv4String   16
249 static void
250 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
251 {
252         uint32_t ipv4_addr;
253
254         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
255         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
256                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
257                 ipv4_addr & 0xFF);
258 }
259
260 #define MAX_CLIENTS_NUMBER      128
261 uint8_t active_clients;
262 struct client_stats_t {
263         uint8_t port;
264         uint32_t ipv4_addr;
265         uint32_t ipv4_rx_packets;
266         uint32_t ipv4_tx_packets;
267 };
268 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
269
270 static void
271 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
272 {
273         int i = 0;
274
275         for (; i < MAX_CLIENTS_NUMBER; i++)     {
276                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
277                         /* Just update RX packets number for this client */
278                         if (TXorRXindicator == &burstnumberRX)
279                                 client_stats[i].ipv4_rx_packets++;
280                         else
281                                 client_stats[i].ipv4_tx_packets++;
282                         return;
283                 }
284         }
285         /* We have a new client. Insert him to the table, and increment stats */
286         if (TXorRXindicator == &burstnumberRX)
287                 client_stats[active_clients].ipv4_rx_packets++;
288         else
289                 client_stats[active_clients].ipv4_tx_packets++;
290         client_stats[active_clients].ipv4_addr = addr;
291         client_stats[active_clients].port = port;
292         active_clients++;
293
294 }
295
296 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
297 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
298                 RTE_LOG(DEBUG, PMD, \
299                 "%s " \
300                 "port:%d " \
301                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
302                 "SrcIP:%s " \
303                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
304                 "DstIP:%s " \
305                 "%s " \
306                 "%d\n", \
307                 info, \
308                 port, \
309                 eth_h->s_addr.addr_bytes[0], \
310                 eth_h->s_addr.addr_bytes[1], \
311                 eth_h->s_addr.addr_bytes[2], \
312                 eth_h->s_addr.addr_bytes[3], \
313                 eth_h->s_addr.addr_bytes[4], \
314                 eth_h->s_addr.addr_bytes[5], \
315                 src_ip, \
316                 eth_h->d_addr.addr_bytes[0], \
317                 eth_h->d_addr.addr_bytes[1], \
318                 eth_h->d_addr.addr_bytes[2], \
319                 eth_h->d_addr.addr_bytes[3], \
320                 eth_h->d_addr.addr_bytes[4], \
321                 eth_h->d_addr.addr_bytes[5], \
322                 dst_ip, \
323                 arp_op, \
324                 ++burstnumber)
325 #endif
326
327 static void
328 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
329                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
330 {
331         struct ipv4_hdr *ipv4_h;
332 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
333         struct arp_hdr *arp_h;
334         char dst_ip[16];
335         char ArpOp[24];
336         char buf[16];
337 #endif
338         char src_ip[16];
339
340         uint16_t ether_type = eth_h->ether_type;
341         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
342
343 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
344         snprintf(buf, 16, "%s", info);
345 #endif
346
347         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
348                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
349                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
350 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
351                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
352                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
353 #endif
354                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
355         }
356 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
357         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
358                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
359                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
360                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
361                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
362                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
363         }
364 #endif
365 }
366 #endif
367
368 static uint16_t
369 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
370 {
371         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
372         struct bond_dev_private *internals = bd_tx_q->dev_private;
373         struct ether_hdr *eth_h;
374         uint16_t ether_type, offset;
375         uint16_t nb_recv_pkts;
376         int i;
377
378         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
379
380         for (i = 0; i < nb_recv_pkts; i++) {
381                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
382                 ether_type = eth_h->ether_type;
383                 offset = get_vlan_offset(eth_h, &ether_type);
384
385                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
386 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
387                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
388 #endif
389                         bond_mode_alb_arp_recv(eth_h, offset, internals);
390                 }
391 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
392                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
393                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
394 #endif
395         }
396
397         return nb_recv_pkts;
398 }
399
400 static uint16_t
401 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
402                 uint16_t nb_pkts)
403 {
404         struct bond_dev_private *internals;
405         struct bond_tx_queue *bd_tx_q;
406
407         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
408         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
409
410         uint8_t num_of_slaves;
411         uint8_t slaves[RTE_MAX_ETHPORTS];
412
413         uint16_t num_tx_total = 0, num_tx_slave;
414
415         static int slave_idx = 0;
416         int i, cslave_idx = 0, tx_fail_total = 0;
417
418         bd_tx_q = (struct bond_tx_queue *)queue;
419         internals = bd_tx_q->dev_private;
420
421         /* Copy slave list to protect against slave up/down changes during tx
422          * bursting */
423         num_of_slaves = internals->active_slave_count;
424         memcpy(slaves, internals->active_slaves,
425                         sizeof(internals->active_slaves[0]) * num_of_slaves);
426
427         if (num_of_slaves < 1)
428                 return num_tx_total;
429
430         /* Populate slaves mbuf with which packets are to be sent on it  */
431         for (i = 0; i < nb_pkts; i++) {
432                 cslave_idx = (slave_idx + i) % num_of_slaves;
433                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
434         }
435
436         /* increment current slave index so the next call to tx burst starts on the
437          * next slave */
438         slave_idx = ++cslave_idx;
439
440         /* Send packet burst on each slave device */
441         for (i = 0; i < num_of_slaves; i++) {
442                 if (slave_nb_pkts[i] > 0) {
443                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
444                                         slave_bufs[i], slave_nb_pkts[i]);
445
446                         /* if tx burst fails move packets to end of bufs */
447                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
448                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
449
450                                 tx_fail_total += tx_fail_slave;
451
452                                 memcpy(&bufs[nb_pkts - tx_fail_total],
453                                                 &slave_bufs[i][num_tx_slave],
454                                                 tx_fail_slave * sizeof(bufs[0]));
455                         }
456                         num_tx_total += num_tx_slave;
457                 }
458         }
459
460         return num_tx_total;
461 }
462
463 static uint16_t
464 bond_ethdev_tx_burst_active_backup(void *queue,
465                 struct rte_mbuf **bufs, uint16_t nb_pkts)
466 {
467         struct bond_dev_private *internals;
468         struct bond_tx_queue *bd_tx_q;
469
470         bd_tx_q = (struct bond_tx_queue *)queue;
471         internals = bd_tx_q->dev_private;
472
473         if (internals->active_slave_count < 1)
474                 return 0;
475
476         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
477                         bufs, nb_pkts);
478 }
479
480 static inline uint16_t
481 ether_hash(struct ether_hdr *eth_hdr)
482 {
483         unaligned_uint16_t *word_src_addr =
484                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
485         unaligned_uint16_t *word_dst_addr =
486                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
487
488         return (word_src_addr[0] ^ word_dst_addr[0]) ^
489                         (word_src_addr[1] ^ word_dst_addr[1]) ^
490                         (word_src_addr[2] ^ word_dst_addr[2]);
491 }
492
493 static inline uint32_t
494 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
495 {
496         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
497 }
498
499 static inline uint32_t
500 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
501 {
502         unaligned_uint32_t *word_src_addr =
503                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
504         unaligned_uint32_t *word_dst_addr =
505                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
506
507         return (word_src_addr[0] ^ word_dst_addr[0]) ^
508                         (word_src_addr[1] ^ word_dst_addr[1]) ^
509                         (word_src_addr[2] ^ word_dst_addr[2]) ^
510                         (word_src_addr[3] ^ word_dst_addr[3]);
511 }
512
513 uint16_t
514 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
515 {
516         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
517
518         uint32_t hash = ether_hash(eth_hdr);
519
520         return (hash ^= hash >> 8) % slave_count;
521 }
522
523 uint16_t
524 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
525 {
526         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
527         uint16_t proto = eth_hdr->ether_type;
528         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
529         uint32_t hash, l3hash = 0;
530
531         hash = ether_hash(eth_hdr);
532
533         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
534                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
535                                 ((char *)(eth_hdr + 1) + vlan_offset);
536                 l3hash = ipv4_hash(ipv4_hdr);
537
538         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
539                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
540                                 ((char *)(eth_hdr + 1) + vlan_offset);
541                 l3hash = ipv6_hash(ipv6_hdr);
542         }
543
544         hash = hash ^ l3hash;
545         hash ^= hash >> 16;
546         hash ^= hash >> 8;
547
548         return hash % slave_count;
549 }
550
551 uint16_t
552 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
553 {
554         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
555         uint16_t proto = eth_hdr->ether_type;
556         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
557
558         struct udp_hdr *udp_hdr = NULL;
559         struct tcp_hdr *tcp_hdr = NULL;
560         uint32_t hash, l3hash = 0, l4hash = 0;
561
562         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
563                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
564                                 ((char *)(eth_hdr + 1) + vlan_offset);
565                 size_t ip_hdr_offset;
566
567                 l3hash = ipv4_hash(ipv4_hdr);
568
569                 /* there is no L4 header in fragmented packet */
570                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
571                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
572                                         IPV4_IHL_MULTIPLIER;
573
574                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
575                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
576                                                 ip_hdr_offset);
577                                 l4hash = HASH_L4_PORTS(tcp_hdr);
578                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
579                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
580                                                 ip_hdr_offset);
581                                 l4hash = HASH_L4_PORTS(udp_hdr);
582                         }
583                 }
584         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
585                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
586                                 ((char *)(eth_hdr + 1) + vlan_offset);
587                 l3hash = ipv6_hash(ipv6_hdr);
588
589                 if (ipv6_hdr->proto == IPPROTO_TCP) {
590                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
591                         l4hash = HASH_L4_PORTS(tcp_hdr);
592                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
593                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
594                         l4hash = HASH_L4_PORTS(udp_hdr);
595                 }
596         }
597
598         hash = l3hash ^ l4hash;
599         hash ^= hash >> 16;
600         hash ^= hash >> 8;
601
602         return hash % slave_count;
603 }
604
605 struct bwg_slave {
606         uint64_t bwg_left_int;
607         uint64_t bwg_left_remainder;
608         uint8_t slave;
609 };
610
611 void
612 bond_tlb_activate_slave(struct bond_dev_private *internals) {
613         int i;
614
615         for (i = 0; i < internals->active_slave_count; i++) {
616                 tlb_last_obytets[internals->active_slaves[i]] = 0;
617         }
618 }
619
620 static int
621 bandwidth_cmp(const void *a, const void *b)
622 {
623         const struct bwg_slave *bwg_a = a;
624         const struct bwg_slave *bwg_b = b;
625         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
626         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
627                         (int64_t)bwg_a->bwg_left_remainder;
628         if (diff > 0)
629                 return 1;
630         else if (diff < 0)
631                 return -1;
632         else if (diff2 > 0)
633                 return 1;
634         else if (diff2 < 0)
635                 return -1;
636         else
637                 return 0;
638 }
639
640 static void
641 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
642                 struct bwg_slave *bwg_slave)
643 {
644         struct rte_eth_link link_status;
645
646         rte_eth_link_get(port_id, &link_status);
647         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
648         if (link_bwg == 0)
649                 return;
650         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
651         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
652         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
653 }
654
655 static void
656 bond_ethdev_update_tlb_slave_cb(void *arg)
657 {
658         struct bond_dev_private *internals = arg;
659         struct rte_eth_stats slave_stats;
660         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
661         uint8_t slave_count;
662         uint64_t tx_bytes;
663
664         uint8_t update_stats = 0;
665         uint8_t i, slave_id;
666
667         internals->slave_update_idx++;
668
669
670         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
671                 update_stats = 1;
672
673         for (i = 0; i < internals->active_slave_count; i++) {
674                 slave_id = internals->active_slaves[i];
675                 rte_eth_stats_get(slave_id, &slave_stats);
676                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
677                 bandwidth_left(slave_id, tx_bytes,
678                                 internals->slave_update_idx, &bwg_array[i]);
679                 bwg_array[i].slave = slave_id;
680
681                 if (update_stats) {
682                         tlb_last_obytets[slave_id] = slave_stats.obytes;
683                 }
684         }
685
686         if (update_stats == 1)
687                 internals->slave_update_idx = 0;
688
689         slave_count = i;
690         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
691         for (i = 0; i < slave_count; i++)
692                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
693
694         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
695                         (struct bond_dev_private *)internals);
696 }
697
698 static uint16_t
699 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
700 {
701         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
702         struct bond_dev_private *internals = bd_tx_q->dev_private;
703
704         struct rte_eth_dev *primary_port =
705                         &rte_eth_devices[internals->primary_port];
706         uint16_t num_tx_total = 0;
707         uint8_t i, j;
708
709         uint8_t num_of_slaves = internals->active_slave_count;
710         uint8_t slaves[RTE_MAX_ETHPORTS];
711
712         struct ether_hdr *ether_hdr;
713         struct ether_addr primary_slave_addr;
714         struct ether_addr active_slave_addr;
715
716         if (num_of_slaves < 1)
717                 return num_tx_total;
718
719         memcpy(slaves, internals->tlb_slaves_order,
720                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
721
722
723         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
724
725         if (nb_pkts > 3) {
726                 for (i = 0; i < 3; i++)
727                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
728         }
729
730         for (i = 0; i < num_of_slaves; i++) {
731                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
732                 for (j = num_tx_total; j < nb_pkts; j++) {
733                         if (j + 3 < nb_pkts)
734                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
735
736                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
737                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
738                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
739 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
740                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
741 #endif
742                 }
743
744                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
745                                 bufs + num_tx_total, nb_pkts - num_tx_total);
746
747                 if (num_tx_total == nb_pkts)
748                         break;
749         }
750
751         return num_tx_total;
752 }
753
754 void
755 bond_tlb_disable(struct bond_dev_private *internals)
756 {
757         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
758 }
759
760 void
761 bond_tlb_enable(struct bond_dev_private *internals)
762 {
763         bond_ethdev_update_tlb_slave_cb(internals);
764 }
765
766 static uint16_t
767 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
768 {
769         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
770         struct bond_dev_private *internals = bd_tx_q->dev_private;
771
772         struct ether_hdr *eth_h;
773         uint16_t ether_type, offset;
774
775         struct client_data *client_info;
776
777         /*
778          * We create transmit buffers for every slave and one additional to send
779          * through tlb. In worst case every packet will be send on one port.
780          */
781         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
782         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
783
784         /*
785          * We create separate transmit buffers for update packets as they wont be
786          * counted in num_tx_total.
787          */
788         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
789         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
790
791         struct rte_mbuf *upd_pkt;
792         size_t pkt_size;
793
794         uint16_t num_send, num_not_send = 0;
795         uint16_t num_tx_total = 0;
796         uint8_t slave_idx;
797
798         int i, j;
799
800         /* Search tx buffer for ARP packets and forward them to alb */
801         for (i = 0; i < nb_pkts; i++) {
802                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
803                 ether_type = eth_h->ether_type;
804                 offset = get_vlan_offset(eth_h, &ether_type);
805
806                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
807                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
808
809                         /* Change src mac in eth header */
810                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
811
812                         /* Add packet to slave tx buffer */
813                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
814                         slave_bufs_pkts[slave_idx]++;
815                 } else {
816                         /* If packet is not ARP, send it with TLB policy */
817                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
818                                         bufs[i];
819                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
820                 }
821         }
822
823         /* Update connected client ARP tables */
824         if (internals->mode6.ntt) {
825                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
826                         client_info = &internals->mode6.client_table[i];
827
828                         if (client_info->in_use) {
829                                 /* Allocate new packet to send ARP update on current slave */
830                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
831                                 if (upd_pkt == NULL) {
832                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
833                                         continue;
834                                 }
835                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
836                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
837                                 upd_pkt->data_len = pkt_size;
838                                 upd_pkt->pkt_len = pkt_size;
839
840                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
841                                                 internals);
842
843                                 /* Add packet to update tx buffer */
844                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
845                                 update_bufs_pkts[slave_idx]++;
846                         }
847                 }
848                 internals->mode6.ntt = 0;
849         }
850
851         /* Send ARP packets on proper slaves */
852         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
853                 if (slave_bufs_pkts[i] > 0) {
854                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
855                                         slave_bufs[i], slave_bufs_pkts[i]);
856                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
857                                 bufs[nb_pkts - 1 - num_not_send - j] =
858                                                 slave_bufs[i][nb_pkts - 1 - j];
859                         }
860
861                         num_tx_total += num_send;
862                         num_not_send += slave_bufs_pkts[i] - num_send;
863
864 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
865         /* Print TX stats including update packets */
866                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
867                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
868                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
869                         }
870 #endif
871                 }
872         }
873
874         /* Send update packets on proper slaves */
875         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
876                 if (update_bufs_pkts[i] > 0) {
877                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
878                                         update_bufs_pkts[i]);
879                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
880                                 rte_pktmbuf_free(update_bufs[i][j]);
881                         }
882 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
883                         for (j = 0; j < update_bufs_pkts[i]; j++) {
884                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
885                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
886                         }
887 #endif
888                 }
889         }
890
891         /* Send non-ARP packets using tlb policy */
892         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
893                 num_send = bond_ethdev_tx_burst_tlb(queue,
894                                 slave_bufs[RTE_MAX_ETHPORTS],
895                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
896
897                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
898                         bufs[nb_pkts - 1 - num_not_send - j] =
899                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
900                 }
901
902                 num_tx_total += num_send;
903         }
904
905         return num_tx_total;
906 }
907
908 static uint16_t
909 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
910                 uint16_t nb_pkts)
911 {
912         struct bond_dev_private *internals;
913         struct bond_tx_queue *bd_tx_q;
914
915         uint8_t num_of_slaves;
916         uint8_t slaves[RTE_MAX_ETHPORTS];
917
918         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
919
920         int i, op_slave_id;
921
922         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
923         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
924
925         bd_tx_q = (struct bond_tx_queue *)queue;
926         internals = bd_tx_q->dev_private;
927
928         /* Copy slave list to protect against slave up/down changes during tx
929          * bursting */
930         num_of_slaves = internals->active_slave_count;
931         memcpy(slaves, internals->active_slaves,
932                         sizeof(internals->active_slaves[0]) * num_of_slaves);
933
934         if (num_of_slaves < 1)
935                 return num_tx_total;
936
937         /* Populate slaves mbuf with the packets which are to be sent on it  */
938         for (i = 0; i < nb_pkts; i++) {
939                 /* Select output slave using hash based on xmit policy */
940                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
941
942                 /* Populate slave mbuf arrays with mbufs for that slave */
943                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
944         }
945
946         /* Send packet burst on each slave device */
947         for (i = 0; i < num_of_slaves; i++) {
948                 if (slave_nb_pkts[i] > 0) {
949                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
950                                         slave_bufs[i], slave_nb_pkts[i]);
951
952                         /* if tx burst fails move packets to end of bufs */
953                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
954                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
955
956                                 tx_fail_total += slave_tx_fail_count;
957                                 memcpy(&bufs[nb_pkts - tx_fail_total],
958                                                 &slave_bufs[i][num_tx_slave],
959                                                 slave_tx_fail_count * sizeof(bufs[0]));
960                         }
961
962                         num_tx_total += num_tx_slave;
963                 }
964         }
965
966         return num_tx_total;
967 }
968
969 static uint16_t
970 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
971                 uint16_t nb_pkts)
972 {
973         struct bond_dev_private *internals;
974         struct bond_tx_queue *bd_tx_q;
975
976         uint8_t num_of_slaves;
977         uint8_t slaves[RTE_MAX_ETHPORTS];
978          /* positions in slaves, not ID */
979         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
980         uint8_t distributing_count;
981
982         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
983         uint16_t i, j, op_slave_idx;
984         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
985
986         /* Allocate additional packets in case 8023AD mode. */
987         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
988         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
989
990         /* Total amount of packets in slave_bufs */
991         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
992         /* Slow packets placed in each slave */
993         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
994
995         bd_tx_q = (struct bond_tx_queue *)queue;
996         internals = bd_tx_q->dev_private;
997
998         /* Copy slave list to protect against slave up/down changes during tx
999          * bursting */
1000         num_of_slaves = internals->active_slave_count;
1001         if (num_of_slaves < 1)
1002                 return num_tx_total;
1003
1004         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1005
1006         distributing_count = 0;
1007         for (i = 0; i < num_of_slaves; i++) {
1008                 struct port *port = &mode_8023ad_ports[slaves[i]];
1009
1010                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1011                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
1012                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1013
1014                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1015                         slave_bufs[i][j] = slow_pkts[j];
1016
1017                 if (ACTOR_STATE(port, DISTRIBUTING))
1018                         distributing_offsets[distributing_count++] = i;
1019         }
1020
1021         if (likely(distributing_count > 0)) {
1022                 /* Populate slaves mbuf with the packets which are to be sent on it */
1023                 for (i = 0; i < nb_pkts; i++) {
1024                         /* Select output slave using hash based on xmit policy */
1025                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1026
1027                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1028                          * slaves that are currently distributing. */
1029                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1030                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1031                         slave_nb_pkts[slave_offset]++;
1032                 }
1033         }
1034
1035         /* Send packet burst on each slave device */
1036         for (i = 0; i < num_of_slaves; i++) {
1037                 if (slave_nb_pkts[i] == 0)
1038                         continue;
1039
1040                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1041                                 slave_bufs[i], slave_nb_pkts[i]);
1042
1043                 /* If tx burst fails drop slow packets */
1044                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1045                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1046
1047                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1048                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1049
1050                 /* If tx burst fails move packets to end of bufs */
1051                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1052                         uint16_t j = nb_pkts - num_tx_fail_total;
1053                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1054                                 bufs[j] = slave_bufs[i][num_tx_slave];
1055                 }
1056         }
1057
1058         return num_tx_total;
1059 }
1060
1061 static uint16_t
1062 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1063                 uint16_t nb_pkts)
1064 {
1065         struct bond_dev_private *internals;
1066         struct bond_tx_queue *bd_tx_q;
1067
1068         uint8_t tx_failed_flag = 0, num_of_slaves;
1069         uint8_t slaves[RTE_MAX_ETHPORTS];
1070
1071         uint16_t max_nb_of_tx_pkts = 0;
1072
1073         int slave_tx_total[RTE_MAX_ETHPORTS];
1074         int i, most_successful_tx_slave = -1;
1075
1076         bd_tx_q = (struct bond_tx_queue *)queue;
1077         internals = bd_tx_q->dev_private;
1078
1079         /* Copy slave list to protect against slave up/down changes during tx
1080          * bursting */
1081         num_of_slaves = internals->active_slave_count;
1082         memcpy(slaves, internals->active_slaves,
1083                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1084
1085         if (num_of_slaves < 1)
1086                 return 0;
1087
1088         /* Increment reference count on mbufs */
1089         for (i = 0; i < nb_pkts; i++)
1090                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1091
1092         /* Transmit burst on each active slave */
1093         for (i = 0; i < num_of_slaves; i++) {
1094                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1095                                         bufs, nb_pkts);
1096
1097                 if (unlikely(slave_tx_total[i] < nb_pkts))
1098                         tx_failed_flag = 1;
1099
1100                 /* record the value and slave index for the slave which transmits the
1101                  * maximum number of packets */
1102                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1103                         max_nb_of_tx_pkts = slave_tx_total[i];
1104                         most_successful_tx_slave = i;
1105                 }
1106         }
1107
1108         /* if slaves fail to transmit packets from burst, the calling application
1109          * is not expected to know about multiple references to packets so we must
1110          * handle failures of all packets except those of the most successful slave
1111          */
1112         if (unlikely(tx_failed_flag))
1113                 for (i = 0; i < num_of_slaves; i++)
1114                         if (i != most_successful_tx_slave)
1115                                 while (slave_tx_total[i] < nb_pkts)
1116                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1117
1118         return max_nb_of_tx_pkts;
1119 }
1120
1121 void
1122 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1123                 struct rte_eth_link *slave_dev_link)
1124 {
1125         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1126         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1127
1128         if (slave_dev_link->link_status &&
1129                 bonded_eth_dev->data->dev_started) {
1130                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1131                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1132
1133                 internals->link_props_set = 1;
1134         }
1135 }
1136
1137 void
1138 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1139 {
1140         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1141
1142         memset(&(bonded_eth_dev->data->dev_link), 0,
1143                         sizeof(bonded_eth_dev->data->dev_link));
1144
1145         internals->link_props_set = 0;
1146 }
1147
1148 int
1149 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1150                 struct rte_eth_link *slave_dev_link)
1151 {
1152         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1153                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1154                 return -1;
1155
1156         return 0;
1157 }
1158
1159 int
1160 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1161 {
1162         struct ether_addr *mac_addr;
1163
1164         if (eth_dev == NULL) {
1165                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1166                 return -1;
1167         }
1168
1169         if (dst_mac_addr == NULL) {
1170                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1171                 return -1;
1172         }
1173
1174         mac_addr = eth_dev->data->mac_addrs;
1175
1176         ether_addr_copy(mac_addr, dst_mac_addr);
1177         return 0;
1178 }
1179
1180 int
1181 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1182 {
1183         struct ether_addr *mac_addr;
1184
1185         if (eth_dev == NULL) {
1186                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1187                 return -1;
1188         }
1189
1190         if (new_mac_addr == NULL) {
1191                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1192                 return -1;
1193         }
1194
1195         mac_addr = eth_dev->data->mac_addrs;
1196
1197         /* If new MAC is different to current MAC then update */
1198         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1199                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1200
1201         return 0;
1202 }
1203
1204 int
1205 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1206 {
1207         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1208         int i;
1209
1210         /* Update slave devices MAC addresses */
1211         if (internals->slave_count < 1)
1212                 return -1;
1213
1214         switch (internals->mode) {
1215         case BONDING_MODE_ROUND_ROBIN:
1216         case BONDING_MODE_BALANCE:
1217         case BONDING_MODE_BROADCAST:
1218                 for (i = 0; i < internals->slave_count; i++) {
1219                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1220                                         bonded_eth_dev->data->mac_addrs)) {
1221                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1222                                                 internals->slaves[i].port_id);
1223                                 return -1;
1224                         }
1225                 }
1226                 break;
1227         case BONDING_MODE_8023AD:
1228                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1229                 break;
1230         case BONDING_MODE_ACTIVE_BACKUP:
1231         case BONDING_MODE_TLB:
1232         case BONDING_MODE_ALB:
1233         default:
1234                 for (i = 0; i < internals->slave_count; i++) {
1235                         if (internals->slaves[i].port_id ==
1236                                         internals->current_primary_port) {
1237                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1238                                                 bonded_eth_dev->data->mac_addrs)) {
1239                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1240                                                         internals->current_primary_port);
1241                                         return -1;
1242                                 }
1243                         } else {
1244                                 if (mac_address_set(
1245                                                 &rte_eth_devices[internals->slaves[i].port_id],
1246                                                 &internals->slaves[i].persisted_mac_addr)) {
1247                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1248                                                         internals->slaves[i].port_id);
1249                                         return -1;
1250                                 }
1251                         }
1252                 }
1253         }
1254
1255         return 0;
1256 }
1257
1258 int
1259 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1260 {
1261         struct bond_dev_private *internals;
1262
1263         internals = eth_dev->data->dev_private;
1264
1265         switch (mode) {
1266         case BONDING_MODE_ROUND_ROBIN:
1267                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1268                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1269                 break;
1270         case BONDING_MODE_ACTIVE_BACKUP:
1271                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1272                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1273                 break;
1274         case BONDING_MODE_BALANCE:
1275                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1276                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1277                 break;
1278         case BONDING_MODE_BROADCAST:
1279                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1280                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1281                 break;
1282         case BONDING_MODE_8023AD:
1283                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1284                         return -1;
1285
1286                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1287                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1288                 RTE_LOG(WARNING, PMD,
1289                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1290                                 "at least every 100ms.\n");
1291                 break;
1292         case BONDING_MODE_TLB:
1293                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1294                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1295                 break;
1296         case BONDING_MODE_ALB:
1297                 if (bond_mode_alb_enable(eth_dev) != 0)
1298                         return -1;
1299
1300                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1301                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1302                 break;
1303         default:
1304                 return -1;
1305         }
1306
1307         internals->mode = mode;
1308
1309         return 0;
1310 }
1311
1312 int
1313 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1314                 struct rte_eth_dev *slave_eth_dev)
1315 {
1316         struct bond_rx_queue *bd_rx_q;
1317         struct bond_tx_queue *bd_tx_q;
1318
1319         int errval;
1320         uint16_t q_id;
1321
1322         /* Stop slave */
1323         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1324
1325         /* Enable interrupts on slave device if supported */
1326         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1327                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1328
1329         /* If RSS is enabled for bonding, try to enable it for slaves  */
1330         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1331                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1332                                 != 0) {
1333                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1334                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1335                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1336                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1337                 } else {
1338                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1339                 }
1340
1341                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1342                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1343                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1344                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1345         }
1346
1347         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1348                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1349
1350         /* Configure device */
1351         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1352                         bonded_eth_dev->data->nb_rx_queues,
1353                         bonded_eth_dev->data->nb_tx_queues,
1354                         &(slave_eth_dev->data->dev_conf));
1355         if (errval != 0) {
1356                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1357                                 slave_eth_dev->data->port_id, errval);
1358                 return errval;
1359         }
1360
1361         /* Setup Rx Queues */
1362         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1363                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1364
1365                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1366                                 bd_rx_q->nb_rx_desc,
1367                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1368                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1369                 if (errval != 0) {
1370                         RTE_BOND_LOG(ERR,
1371                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1372                                         slave_eth_dev->data->port_id, q_id, errval);
1373                         return errval;
1374                 }
1375         }
1376
1377         /* Setup Tx Queues */
1378         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1379                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1380
1381                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1382                                 bd_tx_q->nb_tx_desc,
1383                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1384                                 &bd_tx_q->tx_conf);
1385                 if (errval != 0) {
1386                         RTE_BOND_LOG(ERR,
1387                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1388                                         slave_eth_dev->data->port_id, q_id, errval);
1389                         return errval;
1390                 }
1391         }
1392
1393         /* Start device */
1394         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1395         if (errval != 0) {
1396                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1397                                 slave_eth_dev->data->port_id, errval);
1398                 return -1;
1399         }
1400
1401         /* If RSS is enabled for bonding, synchronize RETA */
1402         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1403                 int i;
1404                 struct bond_dev_private *internals;
1405
1406                 internals = bonded_eth_dev->data->dev_private;
1407
1408                 for (i = 0; i < internals->slave_count; i++) {
1409                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1410                                 errval = rte_eth_dev_rss_reta_update(
1411                                                 slave_eth_dev->data->port_id,
1412                                                 &internals->reta_conf[0],
1413                                                 internals->slaves[i].reta_size);
1414                                 if (errval != 0) {
1415                                         RTE_LOG(WARNING, PMD,
1416                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1417                                                         " RSS Configuration for bonding may be inconsistent.\n",
1418                                                         slave_eth_dev->data->port_id, errval);
1419                                 }
1420                                 break;
1421                         }
1422                 }
1423         }
1424
1425         /* If lsc interrupt is set, check initial slave's link status */
1426         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1427                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1428                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1429
1430         return 0;
1431 }
1432
1433 void
1434 slave_remove(struct bond_dev_private *internals,
1435                 struct rte_eth_dev *slave_eth_dev)
1436 {
1437         uint8_t i;
1438
1439         for (i = 0; i < internals->slave_count; i++)
1440                 if (internals->slaves[i].port_id ==
1441                                 slave_eth_dev->data->port_id)
1442                         break;
1443
1444         if (i < (internals->slave_count - 1))
1445                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1446                                 sizeof(internals->slaves[0]) *
1447                                 (internals->slave_count - i - 1));
1448
1449         internals->slave_count--;
1450
1451         /* force reconfiguration of slave interfaces */
1452         _rte_eth_dev_reset(slave_eth_dev);
1453 }
1454
1455 static void
1456 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1457
1458 void
1459 slave_add(struct bond_dev_private *internals,
1460                 struct rte_eth_dev *slave_eth_dev)
1461 {
1462         struct bond_slave_details *slave_details =
1463                         &internals->slaves[internals->slave_count];
1464
1465         slave_details->port_id = slave_eth_dev->data->port_id;
1466         slave_details->last_link_status = 0;
1467
1468         /* Mark slave devices that don't support interrupts so we can
1469          * compensate when we start the bond
1470          */
1471         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1472                 slave_details->link_status_poll_enabled = 1;
1473         }
1474
1475         slave_details->link_status_wait_to_complete = 0;
1476         /* clean tlb_last_obytes when adding port for bonding device */
1477         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1478                         sizeof(struct ether_addr));
1479 }
1480
1481 void
1482 bond_ethdev_primary_set(struct bond_dev_private *internals,
1483                 uint8_t slave_port_id)
1484 {
1485         int i;
1486
1487         if (internals->active_slave_count < 1)
1488                 internals->current_primary_port = slave_port_id;
1489         else
1490                 /* Search bonded device slave ports for new proposed primary port */
1491                 for (i = 0; i < internals->active_slave_count; i++) {
1492                         if (internals->active_slaves[i] == slave_port_id)
1493                                 internals->current_primary_port = slave_port_id;
1494                 }
1495 }
1496
1497 static void
1498 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1499
1500 static int
1501 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1502 {
1503         struct bond_dev_private *internals;
1504         int i;
1505
1506         /* slave eth dev will be started by bonded device */
1507         if (check_for_bonded_ethdev(eth_dev)) {
1508                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1509                                 eth_dev->data->port_id);
1510                 return -1;
1511         }
1512
1513         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1514         eth_dev->data->dev_started = 1;
1515
1516         internals = eth_dev->data->dev_private;
1517
1518         if (internals->slave_count == 0) {
1519                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1520                 return -1;
1521         }
1522
1523         if (internals->user_defined_mac == 0) {
1524                 struct ether_addr *new_mac_addr = NULL;
1525
1526                 for (i = 0; i < internals->slave_count; i++)
1527                         if (internals->slaves[i].port_id == internals->primary_port)
1528                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1529
1530                 if (new_mac_addr == NULL)
1531                         return -1;
1532
1533                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1534                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1535                                         eth_dev->data->port_id);
1536                         return -1;
1537                 }
1538         }
1539
1540         /* Update all slave devices MACs*/
1541         if (mac_address_slaves_update(eth_dev) != 0)
1542                 return -1;
1543
1544         /* If bonded device is configure in promiscuous mode then re-apply config */
1545         if (internals->promiscuous_en)
1546                 bond_ethdev_promiscuous_enable(eth_dev);
1547
1548         /* Reconfigure each slave device if starting bonded device */
1549         for (i = 0; i < internals->slave_count; i++) {
1550                 if (slave_configure(eth_dev,
1551                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1552                         RTE_BOND_LOG(ERR,
1553                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1554                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1555                         return -1;
1556                 }
1557                 /* We will need to poll for link status if any slave doesn't
1558                  * support interrupts
1559                  */
1560                 if (internals->slaves[i].link_status_poll_enabled)
1561                         internals->link_status_polling_enabled = 1;
1562         }
1563         /* start polling if needed */
1564         if (internals->link_status_polling_enabled) {
1565                 rte_eal_alarm_set(
1566                         internals->link_status_polling_interval_ms * 1000,
1567                         bond_ethdev_slave_link_status_change_monitor,
1568                         (void *)&rte_eth_devices[internals->port_id]);
1569         }
1570
1571         if (internals->user_defined_primary_port)
1572                 bond_ethdev_primary_set(internals, internals->primary_port);
1573
1574         if (internals->mode == BONDING_MODE_8023AD)
1575                 bond_mode_8023ad_start(eth_dev);
1576
1577         if (internals->mode == BONDING_MODE_TLB ||
1578                         internals->mode == BONDING_MODE_ALB)
1579                 bond_tlb_enable(internals);
1580
1581         return 0;
1582 }
1583
1584 static void
1585 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1586 {
1587         uint8_t i;
1588
1589         if (dev->data->rx_queues != NULL) {
1590                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1591                         rte_free(dev->data->rx_queues[i]);
1592                         dev->data->rx_queues[i] = NULL;
1593                 }
1594                 dev->data->nb_rx_queues = 0;
1595         }
1596
1597         if (dev->data->tx_queues != NULL) {
1598                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1599                         rte_free(dev->data->tx_queues[i]);
1600                         dev->data->tx_queues[i] = NULL;
1601                 }
1602                 dev->data->nb_tx_queues = 0;
1603         }
1604 }
1605
1606 void
1607 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1608 {
1609         struct bond_dev_private *internals = eth_dev->data->dev_private;
1610         uint8_t i;
1611
1612         if (internals->mode == BONDING_MODE_8023AD) {
1613                 struct port *port;
1614                 void *pkt = NULL;
1615
1616                 bond_mode_8023ad_stop(eth_dev);
1617
1618                 /* Discard all messages to/from mode 4 state machines */
1619                 for (i = 0; i < internals->active_slave_count; i++) {
1620                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1621
1622                         RTE_ASSERT(port->rx_ring != NULL);
1623                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1624                                 rte_pktmbuf_free(pkt);
1625
1626                         RTE_ASSERT(port->tx_ring != NULL);
1627                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1628                                 rte_pktmbuf_free(pkt);
1629                 }
1630         }
1631
1632         if (internals->mode == BONDING_MODE_TLB ||
1633                         internals->mode == BONDING_MODE_ALB) {
1634                 bond_tlb_disable(internals);
1635                 for (i = 0; i < internals->active_slave_count; i++)
1636                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1637         }
1638
1639         internals->active_slave_count = 0;
1640         internals->link_status_polling_enabled = 0;
1641         for (i = 0; i < internals->slave_count; i++)
1642                 internals->slaves[i].last_link_status = 0;
1643
1644         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1645         eth_dev->data->dev_started = 0;
1646 }
1647
1648 void
1649 bond_ethdev_close(struct rte_eth_dev *dev)
1650 {
1651         struct bond_dev_private *internals = dev->data->dev_private;
1652
1653         bond_ethdev_free_queues(dev);
1654         rte_bitmap_reset(internals->vlan_filter_bmp);
1655 }
1656
1657 /* forward declaration */
1658 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1659
1660 static void
1661 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1662 {
1663         struct bond_dev_private *internals = dev->data->dev_private;
1664
1665         dev_info->max_mac_addrs = 1;
1666
1667         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
1668                                   internals->candidate_max_rx_pktlen : 2048;
1669
1670         dev_info->max_rx_queues = (uint16_t)128;
1671         dev_info->max_tx_queues = (uint16_t)512;
1672
1673         dev_info->min_rx_bufsize = 0;
1674
1675         dev_info->rx_offload_capa = internals->rx_offload_capa;
1676         dev_info->tx_offload_capa = internals->tx_offload_capa;
1677         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1678
1679         dev_info->reta_size = internals->reta_size;
1680 }
1681
1682 static int
1683 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1684 {
1685         int res;
1686         uint8_t i;
1687         struct bond_dev_private *internals = dev->data->dev_private;
1688
1689         /* don't do this while a slave is being added */
1690         rte_spinlock_lock(&internals->lock);
1691
1692         if (on)
1693                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
1694         else
1695                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
1696
1697         for (i = 0; i < internals->slave_count; i++) {
1698                 uint8_t port_id = internals->slaves[i].port_id;
1699
1700                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
1701                 if (res == ENOTSUP)
1702                         RTE_LOG(WARNING, PMD,
1703                                 "Setting VLAN filter on slave port %u not supported.\n",
1704                                 port_id);
1705         }
1706
1707         rte_spinlock_unlock(&internals->lock);
1708         return 0;
1709 }
1710
1711 static int
1712 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1713                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1714                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1715 {
1716         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1717                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1718                                         0, dev->data->numa_node);
1719         if (bd_rx_q == NULL)
1720                 return -1;
1721
1722         bd_rx_q->queue_id = rx_queue_id;
1723         bd_rx_q->dev_private = dev->data->dev_private;
1724
1725         bd_rx_q->nb_rx_desc = nb_rx_desc;
1726
1727         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1728         bd_rx_q->mb_pool = mb_pool;
1729
1730         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1731
1732         return 0;
1733 }
1734
1735 static int
1736 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1737                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1738                 const struct rte_eth_txconf *tx_conf)
1739 {
1740         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1741                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1742                                         0, dev->data->numa_node);
1743
1744         if (bd_tx_q == NULL)
1745                 return -1;
1746
1747         bd_tx_q->queue_id = tx_queue_id;
1748         bd_tx_q->dev_private = dev->data->dev_private;
1749
1750         bd_tx_q->nb_tx_desc = nb_tx_desc;
1751         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1752
1753         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1754
1755         return 0;
1756 }
1757
1758 static void
1759 bond_ethdev_rx_queue_release(void *queue)
1760 {
1761         if (queue == NULL)
1762                 return;
1763
1764         rte_free(queue);
1765 }
1766
1767 static void
1768 bond_ethdev_tx_queue_release(void *queue)
1769 {
1770         if (queue == NULL)
1771                 return;
1772
1773         rte_free(queue);
1774 }
1775
1776 static void
1777 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1778 {
1779         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1780         struct bond_dev_private *internals;
1781
1782         /* Default value for polling slave found is true as we don't want to
1783          * disable the polling thread if we cannot get the lock */
1784         int i, polling_slave_found = 1;
1785
1786         if (cb_arg == NULL)
1787                 return;
1788
1789         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1790         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1791
1792         if (!bonded_ethdev->data->dev_started ||
1793                 !internals->link_status_polling_enabled)
1794                 return;
1795
1796         /* If device is currently being configured then don't check slaves link
1797          * status, wait until next period */
1798         if (rte_spinlock_trylock(&internals->lock)) {
1799                 if (internals->slave_count > 0)
1800                         polling_slave_found = 0;
1801
1802                 for (i = 0; i < internals->slave_count; i++) {
1803                         if (!internals->slaves[i].link_status_poll_enabled)
1804                                 continue;
1805
1806                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1807                         polling_slave_found = 1;
1808
1809                         /* Update slave link status */
1810                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1811                                         internals->slaves[i].link_status_wait_to_complete);
1812
1813                         /* if link status has changed since last checked then call lsc
1814                          * event callback */
1815                         if (slave_ethdev->data->dev_link.link_status !=
1816                                         internals->slaves[i].last_link_status) {
1817                                 internals->slaves[i].last_link_status =
1818                                                 slave_ethdev->data->dev_link.link_status;
1819
1820                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1821                                                 RTE_ETH_EVENT_INTR_LSC,
1822                                                 &bonded_ethdev->data->port_id);
1823                         }
1824                 }
1825                 rte_spinlock_unlock(&internals->lock);
1826         }
1827
1828         if (polling_slave_found)
1829                 /* Set alarm to continue monitoring link status of slave ethdev's */
1830                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1831                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1832 }
1833
1834 static int
1835 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1836                 int wait_to_complete)
1837 {
1838         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1839
1840         if (!bonded_eth_dev->data->dev_started ||
1841                 internals->active_slave_count == 0) {
1842                 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1843                 return 0;
1844         } else {
1845                 struct rte_eth_dev *slave_eth_dev;
1846                 int i, link_up = 0;
1847
1848                 for (i = 0; i < internals->active_slave_count; i++) {
1849                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1850
1851                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1852                                         wait_to_complete);
1853                         if (slave_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
1854                                 link_up = 1;
1855                                 break;
1856                         }
1857                 }
1858
1859                 bonded_eth_dev->data->dev_link.link_status = link_up;
1860         }
1861
1862         return 0;
1863 }
1864
1865 static void
1866 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1867 {
1868         struct bond_dev_private *internals = dev->data->dev_private;
1869         struct rte_eth_stats slave_stats;
1870         int i, j;
1871
1872         for (i = 0; i < internals->slave_count; i++) {
1873                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1874
1875                 stats->ipackets += slave_stats.ipackets;
1876                 stats->opackets += slave_stats.opackets;
1877                 stats->ibytes += slave_stats.ibytes;
1878                 stats->obytes += slave_stats.obytes;
1879                 stats->imissed += slave_stats.imissed;
1880                 stats->ierrors += slave_stats.ierrors;
1881                 stats->oerrors += slave_stats.oerrors;
1882                 stats->rx_nombuf += slave_stats.rx_nombuf;
1883
1884                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1885                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1886                         stats->q_opackets[j] += slave_stats.q_opackets[j];
1887                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1888                         stats->q_obytes[j] += slave_stats.q_obytes[j];
1889                         stats->q_errors[j] += slave_stats.q_errors[j];
1890                 }
1891
1892         }
1893 }
1894
1895 static void
1896 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1897 {
1898         struct bond_dev_private *internals = dev->data->dev_private;
1899         int i;
1900
1901         for (i = 0; i < internals->slave_count; i++)
1902                 rte_eth_stats_reset(internals->slaves[i].port_id);
1903 }
1904
1905 static void
1906 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1907 {
1908         struct bond_dev_private *internals = eth_dev->data->dev_private;
1909         int i;
1910
1911         internals->promiscuous_en = 1;
1912
1913         switch (internals->mode) {
1914         /* Promiscuous mode is propagated to all slaves */
1915         case BONDING_MODE_ROUND_ROBIN:
1916         case BONDING_MODE_BALANCE:
1917         case BONDING_MODE_BROADCAST:
1918                 for (i = 0; i < internals->slave_count; i++)
1919                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1920                 break;
1921         /* In mode4 promiscus mode is managed when slave is added/removed */
1922         case BONDING_MODE_8023AD:
1923                 break;
1924         /* Promiscuous mode is propagated only to primary slave */
1925         case BONDING_MODE_ACTIVE_BACKUP:
1926         case BONDING_MODE_TLB:
1927         case BONDING_MODE_ALB:
1928         default:
1929                 rte_eth_promiscuous_enable(internals->current_primary_port);
1930         }
1931 }
1932
1933 static void
1934 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1935 {
1936         struct bond_dev_private *internals = dev->data->dev_private;
1937         int i;
1938
1939         internals->promiscuous_en = 0;
1940
1941         switch (internals->mode) {
1942         /* Promiscuous mode is propagated to all slaves */
1943         case BONDING_MODE_ROUND_ROBIN:
1944         case BONDING_MODE_BALANCE:
1945         case BONDING_MODE_BROADCAST:
1946                 for (i = 0; i < internals->slave_count; i++)
1947                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1948                 break;
1949         /* In mode4 promiscus mode is set managed when slave is added/removed */
1950         case BONDING_MODE_8023AD:
1951                 break;
1952         /* Promiscuous mode is propagated only to primary slave */
1953         case BONDING_MODE_ACTIVE_BACKUP:
1954         case BONDING_MODE_TLB:
1955         case BONDING_MODE_ALB:
1956         default:
1957                 rte_eth_promiscuous_disable(internals->current_primary_port);
1958         }
1959 }
1960
1961 static void
1962 bond_ethdev_delayed_lsc_propagation(void *arg)
1963 {
1964         if (arg == NULL)
1965                 return;
1966
1967         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1968                         RTE_ETH_EVENT_INTR_LSC, NULL);
1969 }
1970
1971 void
1972 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1973                 void *param)
1974 {
1975         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1976         struct bond_dev_private *internals;
1977         struct rte_eth_link link;
1978
1979         int i, valid_slave = 0;
1980         uint8_t active_pos;
1981         uint8_t lsc_flag = 0;
1982
1983         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1984                 return;
1985
1986         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1987         slave_eth_dev = &rte_eth_devices[port_id];
1988
1989         if (check_for_bonded_ethdev(bonded_eth_dev))
1990                 return;
1991
1992         internals = bonded_eth_dev->data->dev_private;
1993
1994         /* If the device isn't started don't handle interrupts */
1995         if (!bonded_eth_dev->data->dev_started)
1996                 return;
1997
1998         /* verify that port_id is a valid slave of bonded port */
1999         for (i = 0; i < internals->slave_count; i++) {
2000                 if (internals->slaves[i].port_id == port_id) {
2001                         valid_slave = 1;
2002                         break;
2003                 }
2004         }
2005
2006         if (!valid_slave)
2007                 return;
2008
2009         /* Search for port in active port list */
2010         active_pos = find_slave_by_id(internals->active_slaves,
2011                         internals->active_slave_count, port_id);
2012
2013         rte_eth_link_get_nowait(port_id, &link);
2014         if (link.link_status) {
2015                 if (active_pos < internals->active_slave_count)
2016                         return;
2017
2018                 /* if no active slave ports then set this port to be primary port */
2019                 if (internals->active_slave_count < 1) {
2020                         /* If first active slave, then change link status */
2021                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2022                         internals->current_primary_port = port_id;
2023                         lsc_flag = 1;
2024
2025                         mac_address_slaves_update(bonded_eth_dev);
2026
2027                         /* Inherit eth dev link properties from first active slave */
2028                         link_properties_set(bonded_eth_dev,
2029                                         &(slave_eth_dev->data->dev_link));
2030                 } else {
2031                         if (link_properties_valid(
2032                                 &bonded_eth_dev->data->dev_link, &link) != 0) {
2033                                 slave_eth_dev->data->dev_flags &=
2034                                         (~RTE_ETH_DEV_BONDED_SLAVE);
2035                                 RTE_LOG(ERR, PMD,
2036                                         "port %u invalid speed/duplex\n",
2037                                         port_id);
2038                                 return;
2039                         }
2040                 }
2041
2042                 activate_slave(bonded_eth_dev, port_id);
2043
2044                 /* If user has defined the primary port then default to using it */
2045                 if (internals->user_defined_primary_port &&
2046                                 internals->primary_port == port_id)
2047                         bond_ethdev_primary_set(internals, port_id);
2048         } else {
2049                 if (active_pos == internals->active_slave_count)
2050                         return;
2051
2052                 /* Remove from active slave list */
2053                 deactivate_slave(bonded_eth_dev, port_id);
2054
2055                 /* No active slaves, change link status to down and reset other
2056                  * link properties */
2057                 if (internals->active_slave_count < 1) {
2058                         lsc_flag = 1;
2059                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2060
2061                         link_properties_reset(bonded_eth_dev);
2062                 }
2063
2064                 /* Update primary id, take first active slave from list or if none
2065                  * available set to -1 */
2066                 if (port_id == internals->current_primary_port) {
2067                         if (internals->active_slave_count > 0)
2068                                 bond_ethdev_primary_set(internals,
2069                                                 internals->active_slaves[0]);
2070                         else
2071                                 internals->current_primary_port = internals->primary_port;
2072                 }
2073         }
2074
2075         if (lsc_flag) {
2076                 /* Cancel any possible outstanding interrupts if delays are enabled */
2077                 if (internals->link_up_delay_ms > 0 ||
2078                         internals->link_down_delay_ms > 0)
2079                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2080                                         bonded_eth_dev);
2081
2082                 if (bonded_eth_dev->data->dev_link.link_status) {
2083                         if (internals->link_up_delay_ms > 0)
2084                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2085                                                 bond_ethdev_delayed_lsc_propagation,
2086                                                 (void *)bonded_eth_dev);
2087                         else
2088                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2089                                                 RTE_ETH_EVENT_INTR_LSC, NULL);
2090
2091                 } else {
2092                         if (internals->link_down_delay_ms > 0)
2093                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2094                                                 bond_ethdev_delayed_lsc_propagation,
2095                                                 (void *)bonded_eth_dev);
2096                         else
2097                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2098                                                 RTE_ETH_EVENT_INTR_LSC, NULL);
2099                 }
2100         }
2101 }
2102
2103 static int
2104 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2105                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2106 {
2107         unsigned i, j;
2108         int result = 0;
2109         int slave_reta_size;
2110         unsigned reta_count;
2111         struct bond_dev_private *internals = dev->data->dev_private;
2112
2113         if (reta_size != internals->reta_size)
2114                 return -EINVAL;
2115
2116          /* Copy RETA table */
2117         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2118
2119         for (i = 0; i < reta_count; i++) {
2120                 internals->reta_conf[i].mask = reta_conf[i].mask;
2121                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2122                         if ((reta_conf[i].mask >> j) & 0x01)
2123                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2124         }
2125
2126         /* Fill rest of array */
2127         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2128                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2129                                 sizeof(internals->reta_conf[0]) * reta_count);
2130
2131         /* Propagate RETA over slaves */
2132         for (i = 0; i < internals->slave_count; i++) {
2133                 slave_reta_size = internals->slaves[i].reta_size;
2134                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2135                                 &internals->reta_conf[0], slave_reta_size);
2136                 if (result < 0)
2137                         return result;
2138         }
2139
2140         return 0;
2141 }
2142
2143 static int
2144 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2145                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2146 {
2147         int i, j;
2148         struct bond_dev_private *internals = dev->data->dev_private;
2149
2150         if (reta_size != internals->reta_size)
2151                 return -EINVAL;
2152
2153          /* Copy RETA table */
2154         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2155                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2156                         if ((reta_conf[i].mask >> j) & 0x01)
2157                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2158
2159         return 0;
2160 }
2161
2162 static int
2163 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2164                 struct rte_eth_rss_conf *rss_conf)
2165 {
2166         int i, result = 0;
2167         struct bond_dev_private *internals = dev->data->dev_private;
2168         struct rte_eth_rss_conf bond_rss_conf;
2169
2170         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2171
2172         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2173
2174         if (bond_rss_conf.rss_hf != 0)
2175                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2176
2177         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2178                         sizeof(internals->rss_key)) {
2179                 if (bond_rss_conf.rss_key_len == 0)
2180                         bond_rss_conf.rss_key_len = 40;
2181                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2182                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2183                                 internals->rss_key_len);
2184         }
2185
2186         for (i = 0; i < internals->slave_count; i++) {
2187                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2188                                 &bond_rss_conf);
2189                 if (result < 0)
2190                         return result;
2191         }
2192
2193         return 0;
2194 }
2195
2196 static int
2197 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2198                 struct rte_eth_rss_conf *rss_conf)
2199 {
2200         struct bond_dev_private *internals = dev->data->dev_private;
2201
2202         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2203         rss_conf->rss_key_len = internals->rss_key_len;
2204         if (rss_conf->rss_key)
2205                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2206
2207         return 0;
2208 }
2209
2210 const struct eth_dev_ops default_dev_ops = {
2211         .dev_start            = bond_ethdev_start,
2212         .dev_stop             = bond_ethdev_stop,
2213         .dev_close            = bond_ethdev_close,
2214         .dev_configure        = bond_ethdev_configure,
2215         .dev_infos_get        = bond_ethdev_info,
2216         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2217         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2218         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2219         .rx_queue_release     = bond_ethdev_rx_queue_release,
2220         .tx_queue_release     = bond_ethdev_tx_queue_release,
2221         .link_update          = bond_ethdev_link_update,
2222         .stats_get            = bond_ethdev_stats_get,
2223         .stats_reset          = bond_ethdev_stats_reset,
2224         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2225         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2226         .reta_update          = bond_ethdev_rss_reta_update,
2227         .reta_query           = bond_ethdev_rss_reta_query,
2228         .rss_hash_update      = bond_ethdev_rss_hash_update,
2229         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2230 };
2231
2232 static int
2233 bond_probe(const char *name, const char *params)
2234 {
2235         struct bond_dev_private *internals;
2236         struct rte_kvargs *kvlist;
2237         uint8_t bonding_mode, socket_id;
2238         int  arg_count, port_id;
2239
2240         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2241
2242         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2243         if (kvlist == NULL)
2244                 return -1;
2245
2246         /* Parse link bonding mode */
2247         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2248                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2249                                 &bond_ethdev_parse_slave_mode_kvarg,
2250                                 &bonding_mode) != 0) {
2251                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2252                                         name);
2253                         goto parse_error;
2254                 }
2255         } else {
2256                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2257                                 "device %s\n", name);
2258                 goto parse_error;
2259         }
2260
2261         /* Parse socket id to create bonding device on */
2262         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2263         if (arg_count == 1) {
2264                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2265                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2266                                 != 0) {
2267                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2268                                         "bonded device %s\n", name);
2269                         goto parse_error;
2270                 }
2271         } else if (arg_count > 1) {
2272                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2273                                 "bonded device %s\n", name);
2274                 goto parse_error;
2275         } else {
2276                 socket_id = rte_socket_id();
2277         }
2278
2279         /* Create link bonding eth device */
2280         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2281         if (port_id < 0) {
2282                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2283                                 "socket %u.\n", name, bonding_mode, socket_id);
2284                 goto parse_error;
2285         }
2286         internals = rte_eth_devices[port_id].data->dev_private;
2287         internals->kvlist = kvlist;
2288
2289         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2290                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2291         return 0;
2292
2293 parse_error:
2294         rte_kvargs_free(kvlist);
2295
2296         return -1;
2297 }
2298
2299 static int
2300 bond_remove(const char *name)
2301 {
2302         int  ret;
2303
2304         if (name == NULL)
2305                 return -EINVAL;
2306
2307         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2308
2309         /* free link bonding eth device */
2310         ret = rte_eth_bond_free(name);
2311         if (ret < 0)
2312                 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2313
2314         return ret;
2315 }
2316
2317 /* this part will resolve the slave portids after all the other pdev and vdev
2318  * have been allocated */
2319 static int
2320 bond_ethdev_configure(struct rte_eth_dev *dev)
2321 {
2322         char *name = dev->data->name;
2323         struct bond_dev_private *internals = dev->data->dev_private;
2324         struct rte_kvargs *kvlist = internals->kvlist;
2325         int arg_count;
2326         uint8_t port_id = dev - rte_eth_devices;
2327
2328         static const uint8_t default_rss_key[40] = {
2329                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2330                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2331                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2332                 0xBE, 0xAC, 0x01, 0xFA
2333         };
2334
2335         unsigned i, j;
2336
2337         /* If RSS is enabled, fill table and key with default values */
2338         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2339                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2340                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2341                 memcpy(internals->rss_key, default_rss_key, 40);
2342
2343                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2344                         internals->reta_conf[i].mask = ~0LL;
2345                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2346                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2347                 }
2348         }
2349
2350         /* set the max_rx_pktlen */
2351         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2352
2353         /*
2354          * if no kvlist, it means that this bonded device has been created
2355          * through the bonding api.
2356          */
2357         if (!kvlist)
2358                 return 0;
2359
2360         /* Parse MAC address for bonded device */
2361         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2362         if (arg_count == 1) {
2363                 struct ether_addr bond_mac;
2364
2365                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2366                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2367                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2368                                         name);
2369                         return -1;
2370                 }
2371
2372                 /* Set MAC address */
2373                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2374                         RTE_LOG(ERR, EAL,
2375                                         "Failed to set mac address on bonded device %s\n",
2376                                         name);
2377                         return -1;
2378                 }
2379         } else if (arg_count > 1) {
2380                 RTE_LOG(ERR, EAL,
2381                                 "MAC address can be specified only once for bonded device %s\n",
2382                                 name);
2383                 return -1;
2384         }
2385
2386         /* Parse/set balance mode transmit policy */
2387         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2388         if (arg_count == 1) {
2389                 uint8_t xmit_policy;
2390
2391                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2392                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2393                                                 0) {
2394                         RTE_LOG(INFO, EAL,
2395                                         "Invalid xmit policy specified for bonded device %s\n",
2396                                         name);
2397                         return -1;
2398                 }
2399
2400                 /* Set balance mode transmit policy*/
2401                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2402                         RTE_LOG(ERR, EAL,
2403                                         "Failed to set balance xmit policy on bonded device %s\n",
2404                                         name);
2405                         return -1;
2406                 }
2407         } else if (arg_count > 1) {
2408                 RTE_LOG(ERR, EAL,
2409                                 "Transmit policy can be specified only once for bonded device"
2410                                 " %s\n", name);
2411                 return -1;
2412         }
2413
2414         /* Parse/add slave ports to bonded device */
2415         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2416                 struct bond_ethdev_slave_ports slave_ports;
2417                 unsigned i;
2418
2419                 memset(&slave_ports, 0, sizeof(slave_ports));
2420
2421                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2422                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2423                         RTE_LOG(ERR, EAL,
2424                                         "Failed to parse slave ports for bonded device %s\n",
2425                                         name);
2426                         return -1;
2427                 }
2428
2429                 for (i = 0; i < slave_ports.slave_count; i++) {
2430                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2431                                 RTE_LOG(ERR, EAL,
2432                                                 "Failed to add port %d as slave to bonded device %s\n",
2433                                                 slave_ports.slaves[i], name);
2434                         }
2435                 }
2436
2437         } else {
2438                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2439                 return -1;
2440         }
2441
2442         /* Parse/set primary slave port id*/
2443         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2444         if (arg_count == 1) {
2445                 uint8_t primary_slave_port_id;
2446
2447                 if (rte_kvargs_process(kvlist,
2448                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
2449                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2450                                 &primary_slave_port_id) < 0) {
2451                         RTE_LOG(INFO, EAL,
2452                                         "Invalid primary slave port id specified for bonded device"
2453                                         " %s\n", name);
2454                         return -1;
2455                 }
2456
2457                 /* Set balance mode transmit policy*/
2458                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2459                                 != 0) {
2460                         RTE_LOG(ERR, EAL,
2461                                         "Failed to set primary slave port %d on bonded device %s\n",
2462                                         primary_slave_port_id, name);
2463                         return -1;
2464                 }
2465         } else if (arg_count > 1) {
2466                 RTE_LOG(INFO, EAL,
2467                                 "Primary slave can be specified only once for bonded device"
2468                                 " %s\n", name);
2469                 return -1;
2470         }
2471
2472         /* Parse link status monitor polling interval */
2473         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2474         if (arg_count == 1) {
2475                 uint32_t lsc_poll_interval_ms;
2476
2477                 if (rte_kvargs_process(kvlist,
2478                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2479                                 &bond_ethdev_parse_time_ms_kvarg,
2480                                 &lsc_poll_interval_ms) < 0) {
2481                         RTE_LOG(INFO, EAL,
2482                                         "Invalid lsc polling interval value specified for bonded"
2483                                         " device %s\n", name);
2484                         return -1;
2485                 }
2486
2487                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2488                                 != 0) {
2489                         RTE_LOG(ERR, EAL,
2490                                         "Failed to set lsc monitor polling interval (%u ms) on"
2491                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2492                         return -1;
2493                 }
2494         } else if (arg_count > 1) {
2495                 RTE_LOG(INFO, EAL,
2496                                 "LSC polling interval can be specified only once for bonded"
2497                                 " device %s\n", name);
2498                 return -1;
2499         }
2500
2501         /* Parse link up interrupt propagation delay */
2502         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2503         if (arg_count == 1) {
2504                 uint32_t link_up_delay_ms;
2505
2506                 if (rte_kvargs_process(kvlist,
2507                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2508                                 &bond_ethdev_parse_time_ms_kvarg,
2509                                 &link_up_delay_ms) < 0) {
2510                         RTE_LOG(INFO, EAL,
2511                                         "Invalid link up propagation delay value specified for"
2512                                         " bonded device %s\n", name);
2513                         return -1;
2514                 }
2515
2516                 /* Set balance mode transmit policy*/
2517                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2518                                 != 0) {
2519                         RTE_LOG(ERR, EAL,
2520                                         "Failed to set link up propagation delay (%u ms) on bonded"
2521                                         " device %s\n", link_up_delay_ms, name);
2522                         return -1;
2523                 }
2524         } else if (arg_count > 1) {
2525                 RTE_LOG(INFO, EAL,
2526                                 "Link up propagation delay can be specified only once for"
2527                                 " bonded device %s\n", name);
2528                 return -1;
2529         }
2530
2531         /* Parse link down interrupt propagation delay */
2532         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2533         if (arg_count == 1) {
2534                 uint32_t link_down_delay_ms;
2535
2536                 if (rte_kvargs_process(kvlist,
2537                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2538                                 &bond_ethdev_parse_time_ms_kvarg,
2539                                 &link_down_delay_ms) < 0) {
2540                         RTE_LOG(INFO, EAL,
2541                                         "Invalid link down propagation delay value specified for"
2542                                         " bonded device %s\n", name);
2543                         return -1;
2544                 }
2545
2546                 /* Set balance mode transmit policy*/
2547                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2548                                 != 0) {
2549                         RTE_LOG(ERR, EAL,
2550                                         "Failed to set link down propagation delay (%u ms) on"
2551                                         " bonded device %s\n", link_down_delay_ms, name);
2552                         return -1;
2553                 }
2554         } else if (arg_count > 1) {
2555                 RTE_LOG(INFO, EAL,
2556                                 "Link down propagation delay can be specified only once for"
2557                                 " bonded device %s\n", name);
2558                 return -1;
2559         }
2560
2561         return 0;
2562 }
2563
2564 struct rte_vdev_driver pmd_bond_drv = {
2565         .probe = bond_probe,
2566         .remove = bond_remove,
2567 };
2568
2569 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
2570 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
2571
2572 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
2573         "slave=<ifc> "
2574         "primary=<ifc> "
2575         "mode=[0-6] "
2576         "xmit_policy=[l2 | l23 | l34] "
2577         "socket_id=<int> "
2578         "mac=<mac addr> "
2579         "lsc_poll_period_ms=<int> "
2580         "up_delay=<int> "
2581         "down_delay=<int>");