bonding: remove fake pci interface
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
44 #include <rte_dev.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
47
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
51
52 #define REORDER_PERIOD_MS 10
53
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
55
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
58
59 static inline size_t
60 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
61 {
62         size_t vlan_offset = 0;
63
64         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
65                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
66
67                 vlan_offset = sizeof(struct vlan_hdr);
68                 *proto = vlan_hdr->eth_proto;
69
70                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
71                         vlan_hdr = vlan_hdr + 1;
72                         *proto = vlan_hdr->eth_proto;
73                         vlan_offset += sizeof(struct vlan_hdr);
74                 }
75         }
76         return vlan_offset;
77 }
78
79 static uint16_t
80 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
81 {
82         struct bond_dev_private *internals;
83
84         uint16_t num_rx_slave = 0;
85         uint16_t num_rx_total = 0;
86
87         int i;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94
95         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
96                 /* Offset of pointer to *bufs increases as packets are received
97                  * from other slaves */
98                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
99                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
100                 if (num_rx_slave) {
101                         num_rx_total += num_rx_slave;
102                         nb_pkts -= num_rx_slave;
103                 }
104         }
105
106         return num_rx_total;
107 }
108
109 static uint16_t
110 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
111                 uint16_t nb_pkts)
112 {
113         struct bond_dev_private *internals;
114
115         /* Cast to structure, containing bonded device's port id and queue id */
116         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
117
118         internals = bd_rx_q->dev_private;
119
120         return rte_eth_rx_burst(internals->current_primary_port,
121                         bd_rx_q->queue_id, bufs, nb_pkts);
122 }
123
124 static uint16_t
125 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
126                 uint16_t nb_pkts)
127 {
128         /* Cast to structure, containing bonded device's port id and queue id */
129         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
130         struct bond_dev_private *internals = bd_rx_q->dev_private;
131         struct ether_addr bond_mac;
132
133         struct ether_hdr *hdr;
134
135         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136         uint16_t num_rx_total = 0;      /* Total number of received packets */
137         uint8_t slaves[RTE_MAX_ETHPORTS];
138         uint8_t slave_count;
139
140         uint8_t collecting;  /* current slave collecting status */
141         const uint8_t promisc = internals->promiscuous_en;
142         uint8_t i, j, k;
143
144         rte_eth_macaddr_get(internals->port_id, &bond_mac);
145         /* Copy slave list to protect against slave up/down changes during tx
146          * bursting */
147         slave_count = internals->active_slave_count;
148         memcpy(slaves, internals->active_slaves,
149                         sizeof(internals->active_slaves[0]) * slave_count);
150
151         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
152                 j = num_rx_total;
153                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
154
155                 /* Read packets from this slave */
156                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
157                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
158
159                 for (k = j; k < 2 && k < num_rx_total; k++)
160                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
161
162                 /* Handle slow protocol packets. */
163                 while (j < num_rx_total) {
164                         if (j + 3 < num_rx_total)
165                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
166
167                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
168                         /* Remove packet from array if it is slow packet or slave is not
169                          * in collecting state or bondign interface is not in promiscus
170                          * mode and packet address does not match. */
171                         if (unlikely(hdr->ether_type == ether_type_slow_be ||
172                                 !collecting || (!promisc &&
173                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
174
175                                 if (hdr->ether_type == ether_type_slow_be) {
176                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
177                                                 bufs[j]);
178                                 } else
179                                         rte_pktmbuf_free(bufs[j]);
180
181                                 /* Packet is managed by mode 4 or dropped, shift the array */
182                                 num_rx_total--;
183                                 if (j < num_rx_total) {
184                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
185                                                 (num_rx_total - j));
186                                 }
187                         } else
188                                 j++;
189                 }
190         }
191
192         return num_rx_total;
193 }
194
195 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
196 uint32_t burstnumberRX;
197 uint32_t burstnumberTX;
198
199 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
200
201 static void
202 arp_op_name(uint16_t arp_op, char *buf)
203 {
204         switch (arp_op) {
205         case ARP_OP_REQUEST:
206                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
207                 return;
208         case ARP_OP_REPLY:
209                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
210                 return;
211         case ARP_OP_REVREQUEST:
212                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
213                                 "Reverse ARP Request");
214                 return;
215         case ARP_OP_REVREPLY:
216                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
217                                 "Reverse ARP Reply");
218                 return;
219         case ARP_OP_INVREQUEST:
220                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
221                                 "Peer Identify Request");
222                 return;
223         case ARP_OP_INVREPLY:
224                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
225                                 "Peer Identify Reply");
226                 return;
227         default:
228                 break;
229         }
230         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
231         return;
232 }
233 #endif
234 #define MaxIPv4String   16
235 static void
236 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
237 {
238         uint32_t ipv4_addr;
239
240         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
241         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
242                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
243                 ipv4_addr & 0xFF);
244 }
245
246 #define MAX_CLIENTS_NUMBER      128
247 uint8_t active_clients;
248 struct client_stats_t {
249         uint8_t port;
250         uint32_t ipv4_addr;
251         uint32_t ipv4_rx_packets;
252         uint32_t ipv4_tx_packets;
253 };
254 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
255
256 static void
257 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
258 {
259         int i = 0;
260
261         for (; i < MAX_CLIENTS_NUMBER; i++)     {
262                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
263                         /* Just update RX packets number for this client */
264                         if (TXorRXindicator == &burstnumberRX)
265                                 client_stats[i].ipv4_rx_packets++;
266                         else
267                                 client_stats[i].ipv4_tx_packets++;
268                         return;
269                 }
270         }
271         /* We have a new client. Insert him to the table, and increment stats */
272         if (TXorRXindicator == &burstnumberRX)
273                 client_stats[active_clients].ipv4_rx_packets++;
274         else
275                 client_stats[active_clients].ipv4_tx_packets++;
276         client_stats[active_clients].ipv4_addr = addr;
277         client_stats[active_clients].port = port;
278         active_clients++;
279
280 }
281
282 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
283 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
284                 RTE_LOG(DEBUG, PMD, \
285                 "%s " \
286                 "port:%d " \
287                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
288                 "SrcIP:%s " \
289                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
290                 "DstIP:%s " \
291                 "%s " \
292                 "%d\n", \
293                 info, \
294                 port, \
295                 eth_h->s_addr.addr_bytes[0], \
296                 eth_h->s_addr.addr_bytes[1], \
297                 eth_h->s_addr.addr_bytes[2], \
298                 eth_h->s_addr.addr_bytes[3], \
299                 eth_h->s_addr.addr_bytes[4], \
300                 eth_h->s_addr.addr_bytes[5], \
301                 src_ip, \
302                 eth_h->d_addr.addr_bytes[0], \
303                 eth_h->d_addr.addr_bytes[1], \
304                 eth_h->d_addr.addr_bytes[2], \
305                 eth_h->d_addr.addr_bytes[3], \
306                 eth_h->d_addr.addr_bytes[4], \
307                 eth_h->d_addr.addr_bytes[5], \
308                 dst_ip, \
309                 arp_op, \
310                 ++burstnumber)
311 #endif
312
313 static void
314 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
315                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
316 {
317         struct ipv4_hdr *ipv4_h;
318 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
319         struct arp_hdr *arp_h;
320         char dst_ip[16];
321         char ArpOp[24];
322         char buf[16];
323 #endif
324         char src_ip[16];
325
326         uint16_t ether_type = eth_h->ether_type;
327         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
328
329 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
330         snprintf(buf, 16, "%s", info);
331 #endif
332
333         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
334                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
335                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
336 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
337                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
338                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
339 #endif
340                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
341         }
342 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
343         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
344                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
345                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
346                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
347                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
348                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
349         }
350 #endif
351 }
352 #endif
353
354 static uint16_t
355 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
356 {
357         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
358         struct bond_dev_private *internals = bd_tx_q->dev_private;
359         struct ether_hdr *eth_h;
360         uint16_t ether_type, offset;
361         uint16_t nb_recv_pkts;
362         int i;
363
364         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
365
366         for (i = 0; i < nb_recv_pkts; i++) {
367                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
368                 ether_type = eth_h->ether_type;
369                 offset = get_vlan_offset(eth_h, &ether_type);
370
371                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
372 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
373                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
374 #endif
375                         bond_mode_alb_arp_recv(eth_h, offset, internals);
376                 }
377 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
378                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
379                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
380 #endif
381         }
382
383         return nb_recv_pkts;
384 }
385
386 static uint16_t
387 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
388                 uint16_t nb_pkts)
389 {
390         struct bond_dev_private *internals;
391         struct bond_tx_queue *bd_tx_q;
392
393         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
394         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
395
396         uint8_t num_of_slaves;
397         uint8_t slaves[RTE_MAX_ETHPORTS];
398
399         uint16_t num_tx_total = 0, num_tx_slave;
400
401         static int slave_idx = 0;
402         int i, cslave_idx = 0, tx_fail_total = 0;
403
404         bd_tx_q = (struct bond_tx_queue *)queue;
405         internals = bd_tx_q->dev_private;
406
407         /* Copy slave list to protect against slave up/down changes during tx
408          * bursting */
409         num_of_slaves = internals->active_slave_count;
410         memcpy(slaves, internals->active_slaves,
411                         sizeof(internals->active_slaves[0]) * num_of_slaves);
412
413         if (num_of_slaves < 1)
414                 return num_tx_total;
415
416         /* Populate slaves mbuf with which packets are to be sent on it  */
417         for (i = 0; i < nb_pkts; i++) {
418                 cslave_idx = (slave_idx + i) % num_of_slaves;
419                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
420         }
421
422         /* increment current slave index so the next call to tx burst starts on the
423          * next slave */
424         slave_idx = ++cslave_idx;
425
426         /* Send packet burst on each slave device */
427         for (i = 0; i < num_of_slaves; i++) {
428                 if (slave_nb_pkts[i] > 0) {
429                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
430                                         slave_bufs[i], slave_nb_pkts[i]);
431
432                         /* if tx burst fails move packets to end of bufs */
433                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
434                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
435
436                                 tx_fail_total += tx_fail_slave;
437
438                                 memcpy(&bufs[nb_pkts - tx_fail_total],
439                                                 &slave_bufs[i][num_tx_slave],
440                                                 tx_fail_slave * sizeof(bufs[0]));
441                         }
442                         num_tx_total += num_tx_slave;
443                 }
444         }
445
446         return num_tx_total;
447 }
448
449 static uint16_t
450 bond_ethdev_tx_burst_active_backup(void *queue,
451                 struct rte_mbuf **bufs, uint16_t nb_pkts)
452 {
453         struct bond_dev_private *internals;
454         struct bond_tx_queue *bd_tx_q;
455
456         bd_tx_q = (struct bond_tx_queue *)queue;
457         internals = bd_tx_q->dev_private;
458
459         if (internals->active_slave_count < 1)
460                 return 0;
461
462         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
463                         bufs, nb_pkts);
464 }
465
466 static inline uint16_t
467 ether_hash(struct ether_hdr *eth_hdr)
468 {
469         unaligned_uint16_t *word_src_addr =
470                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
471         unaligned_uint16_t *word_dst_addr =
472                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
473
474         return (word_src_addr[0] ^ word_dst_addr[0]) ^
475                         (word_src_addr[1] ^ word_dst_addr[1]) ^
476                         (word_src_addr[2] ^ word_dst_addr[2]);
477 }
478
479 static inline uint32_t
480 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
481 {
482         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
483 }
484
485 static inline uint32_t
486 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
487 {
488         unaligned_uint32_t *word_src_addr =
489                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
490         unaligned_uint32_t *word_dst_addr =
491                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
492
493         return (word_src_addr[0] ^ word_dst_addr[0]) ^
494                         (word_src_addr[1] ^ word_dst_addr[1]) ^
495                         (word_src_addr[2] ^ word_dst_addr[2]) ^
496                         (word_src_addr[3] ^ word_dst_addr[3]);
497 }
498
499 uint16_t
500 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
501 {
502         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
503
504         uint32_t hash = ether_hash(eth_hdr);
505
506         return (hash ^= hash >> 8) % slave_count;
507 }
508
509 uint16_t
510 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
511 {
512         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
513         uint16_t proto = eth_hdr->ether_type;
514         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
515         uint32_t hash, l3hash = 0;
516
517         hash = ether_hash(eth_hdr);
518
519         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
520                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
521                                 ((char *)(eth_hdr + 1) + vlan_offset);
522                 l3hash = ipv4_hash(ipv4_hdr);
523
524         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
525                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
526                                 ((char *)(eth_hdr + 1) + vlan_offset);
527                 l3hash = ipv6_hash(ipv6_hdr);
528         }
529
530         hash = hash ^ l3hash;
531         hash ^= hash >> 16;
532         hash ^= hash >> 8;
533
534         return hash % slave_count;
535 }
536
537 uint16_t
538 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
539 {
540         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
541         uint16_t proto = eth_hdr->ether_type;
542         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
543
544         struct udp_hdr *udp_hdr = NULL;
545         struct tcp_hdr *tcp_hdr = NULL;
546         uint32_t hash, l3hash = 0, l4hash = 0;
547
548         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
549                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
550                                 ((char *)(eth_hdr + 1) + vlan_offset);
551                 size_t ip_hdr_offset;
552
553                 l3hash = ipv4_hash(ipv4_hdr);
554
555                 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
556                                 IPV4_IHL_MULTIPLIER;
557
558                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
559                         tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
560                                         ip_hdr_offset);
561                         l4hash = HASH_L4_PORTS(tcp_hdr);
562                 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
563                         udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
564                                         ip_hdr_offset);
565                         l4hash = HASH_L4_PORTS(udp_hdr);
566                 }
567         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
568                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
569                                 ((char *)(eth_hdr + 1) + vlan_offset);
570                 l3hash = ipv6_hash(ipv6_hdr);
571
572                 if (ipv6_hdr->proto == IPPROTO_TCP) {
573                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
574                         l4hash = HASH_L4_PORTS(tcp_hdr);
575                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
576                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
577                         l4hash = HASH_L4_PORTS(udp_hdr);
578                 }
579         }
580
581         hash = l3hash ^ l4hash;
582         hash ^= hash >> 16;
583         hash ^= hash >> 8;
584
585         return hash % slave_count;
586 }
587
588 struct bwg_slave {
589         uint64_t bwg_left_int;
590         uint64_t bwg_left_remainder;
591         uint8_t slave;
592 };
593
594 void
595 bond_tlb_activate_slave(struct bond_dev_private *internals) {
596         int i;
597
598         for (i = 0; i < internals->active_slave_count; i++) {
599                 tlb_last_obytets[internals->active_slaves[i]] = 0;
600         }
601 }
602
603 static int
604 bandwidth_cmp(const void *a, const void *b)
605 {
606         const struct bwg_slave *bwg_a = a;
607         const struct bwg_slave *bwg_b = b;
608         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
609         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
610                         (int64_t)bwg_a->bwg_left_remainder;
611         if (diff > 0)
612                 return 1;
613         else if (diff < 0)
614                 return -1;
615         else if (diff2 > 0)
616                 return 1;
617         else if (diff2 < 0)
618                 return -1;
619         else
620                 return 0;
621 }
622
623 static void
624 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
625                 struct bwg_slave *bwg_slave)
626 {
627         struct rte_eth_link link_status;
628
629         rte_eth_link_get(port_id, &link_status);
630         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
631         if (link_bwg == 0)
632                 return;
633         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
634         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
635         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
636 }
637
638 static void
639 bond_ethdev_update_tlb_slave_cb(void *arg)
640 {
641         struct bond_dev_private *internals = arg;
642         struct rte_eth_stats slave_stats;
643         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
644         uint8_t slave_count;
645         uint64_t tx_bytes;
646
647         uint8_t update_stats = 0;
648         uint8_t i, slave_id;
649
650         internals->slave_update_idx++;
651
652
653         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
654                 update_stats = 1;
655
656         for (i = 0; i < internals->active_slave_count; i++) {
657                 slave_id = internals->active_slaves[i];
658                 rte_eth_stats_get(slave_id, &slave_stats);
659                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
660                 bandwidth_left(slave_id, tx_bytes,
661                                 internals->slave_update_idx, &bwg_array[i]);
662                 bwg_array[i].slave = slave_id;
663
664                 if (update_stats) {
665                         tlb_last_obytets[slave_id] = slave_stats.obytes;
666                 }
667         }
668
669         if (update_stats == 1)
670                 internals->slave_update_idx = 0;
671
672         slave_count = i;
673         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
674         for (i = 0; i < slave_count; i++)
675                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
676
677         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
678                         (struct bond_dev_private *)internals);
679 }
680
681 static uint16_t
682 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
683 {
684         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
685         struct bond_dev_private *internals = bd_tx_q->dev_private;
686
687         struct rte_eth_dev *primary_port =
688                         &rte_eth_devices[internals->primary_port];
689         uint16_t num_tx_total = 0;
690         uint8_t i, j;
691
692         uint8_t num_of_slaves = internals->active_slave_count;
693         uint8_t slaves[RTE_MAX_ETHPORTS];
694
695         struct ether_hdr *ether_hdr;
696         struct ether_addr primary_slave_addr;
697         struct ether_addr active_slave_addr;
698
699         if (num_of_slaves < 1)
700                 return num_tx_total;
701
702         memcpy(slaves, internals->tlb_slaves_order,
703                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
704
705
706         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
707
708         if (nb_pkts > 3) {
709                 for (i = 0; i < 3; i++)
710                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
711         }
712
713         for (i = 0; i < num_of_slaves; i++) {
714                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
715                 for (j = num_tx_total; j < nb_pkts; j++) {
716                         if (j + 3 < nb_pkts)
717                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
718
719                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
720                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
721                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
722 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
723                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
724 #endif
725                 }
726
727                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
728                                 bufs + num_tx_total, nb_pkts - num_tx_total);
729
730                 if (num_tx_total == nb_pkts)
731                         break;
732         }
733
734         return num_tx_total;
735 }
736
737 void
738 bond_tlb_disable(struct bond_dev_private *internals)
739 {
740         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
741 }
742
743 void
744 bond_tlb_enable(struct bond_dev_private *internals)
745 {
746         bond_ethdev_update_tlb_slave_cb(internals);
747 }
748
749 static uint16_t
750 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
751 {
752         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
753         struct bond_dev_private *internals = bd_tx_q->dev_private;
754
755         struct ether_hdr *eth_h;
756         uint16_t ether_type, offset;
757
758         struct client_data *client_info;
759
760         /*
761          * We create transmit buffers for every slave and one additional to send
762          * through tlb. In worst case every packet will be send on one port.
763          */
764         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
765         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
766
767         /*
768          * We create separate transmit buffers for update packets as they wont be
769          * counted in num_tx_total.
770          */
771         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
772         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
773
774         struct rte_mbuf *upd_pkt;
775         size_t pkt_size;
776
777         uint16_t num_send, num_not_send = 0;
778         uint16_t num_tx_total = 0;
779         uint8_t slave_idx;
780
781         int i, j;
782
783         /* Search tx buffer for ARP packets and forward them to alb */
784         for (i = 0; i < nb_pkts; i++) {
785                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
786                 ether_type = eth_h->ether_type;
787                 offset = get_vlan_offset(eth_h, &ether_type);
788
789                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
790                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
791
792                         /* Change src mac in eth header */
793                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
794
795                         /* Add packet to slave tx buffer */
796                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
797                         slave_bufs_pkts[slave_idx]++;
798                 } else {
799                         /* If packet is not ARP, send it with TLB policy */
800                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
801                                         bufs[i];
802                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
803                 }
804         }
805
806         /* Update connected client ARP tables */
807         if (internals->mode6.ntt) {
808                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
809                         client_info = &internals->mode6.client_table[i];
810
811                         if (client_info->in_use) {
812                                 /* Allocate new packet to send ARP update on current slave */
813                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
814                                 if (upd_pkt == NULL) {
815                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
816                                         continue;
817                                 }
818                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
819                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
820                                 upd_pkt->data_len = pkt_size;
821                                 upd_pkt->pkt_len = pkt_size;
822
823                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
824                                                 internals);
825
826                                 /* Add packet to update tx buffer */
827                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
828                                 update_bufs_pkts[slave_idx]++;
829                         }
830                 }
831                 internals->mode6.ntt = 0;
832         }
833
834         /* Send ARP packets on proper slaves */
835         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
836                 if (slave_bufs_pkts[i] > 0) {
837                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
838                                         slave_bufs[i], slave_bufs_pkts[i]);
839                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
840                                 bufs[nb_pkts - 1 - num_not_send - j] =
841                                                 slave_bufs[i][nb_pkts - 1 - j];
842                         }
843
844                         num_tx_total += num_send;
845                         num_not_send += slave_bufs_pkts[i] - num_send;
846
847 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
848         /* Print TX stats including update packets */
849                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
850                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
851                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
852                         }
853 #endif
854                 }
855         }
856
857         /* Send update packets on proper slaves */
858         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
859                 if (update_bufs_pkts[i] > 0) {
860                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
861                                         update_bufs_pkts[i]);
862                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
863                                 rte_pktmbuf_free(update_bufs[i][j]);
864                         }
865 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
866                         for (j = 0; j < update_bufs_pkts[i]; j++) {
867                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
868                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
869                         }
870 #endif
871                 }
872         }
873
874         /* Send non-ARP packets using tlb policy */
875         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
876                 num_send = bond_ethdev_tx_burst_tlb(queue,
877                                 slave_bufs[RTE_MAX_ETHPORTS],
878                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
879
880                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
881                         bufs[nb_pkts - 1 - num_not_send - j] =
882                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
883                 }
884
885                 num_tx_total += num_send;
886                 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
887         }
888
889         return num_tx_total;
890 }
891
892 static uint16_t
893 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
894                 uint16_t nb_pkts)
895 {
896         struct bond_dev_private *internals;
897         struct bond_tx_queue *bd_tx_q;
898
899         uint8_t num_of_slaves;
900         uint8_t slaves[RTE_MAX_ETHPORTS];
901
902         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
903
904         int i, op_slave_id;
905
906         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
907         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
908
909         bd_tx_q = (struct bond_tx_queue *)queue;
910         internals = bd_tx_q->dev_private;
911
912         /* Copy slave list to protect against slave up/down changes during tx
913          * bursting */
914         num_of_slaves = internals->active_slave_count;
915         memcpy(slaves, internals->active_slaves,
916                         sizeof(internals->active_slaves[0]) * num_of_slaves);
917
918         if (num_of_slaves < 1)
919                 return num_tx_total;
920
921         /* Populate slaves mbuf with the packets which are to be sent on it  */
922         for (i = 0; i < nb_pkts; i++) {
923                 /* Select output slave using hash based on xmit policy */
924                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
925
926                 /* Populate slave mbuf arrays with mbufs for that slave */
927                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
928         }
929
930         /* Send packet burst on each slave device */
931         for (i = 0; i < num_of_slaves; i++) {
932                 if (slave_nb_pkts[i] > 0) {
933                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
934                                         slave_bufs[i], slave_nb_pkts[i]);
935
936                         /* if tx burst fails move packets to end of bufs */
937                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
938                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
939
940                                 tx_fail_total += slave_tx_fail_count;
941                                 memcpy(&bufs[nb_pkts - tx_fail_total],
942                                                 &slave_bufs[i][num_tx_slave],
943                                                 slave_tx_fail_count * sizeof(bufs[0]));
944                         }
945
946                         num_tx_total += num_tx_slave;
947                 }
948         }
949
950         return num_tx_total;
951 }
952
953 static uint16_t
954 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
955                 uint16_t nb_pkts)
956 {
957         struct bond_dev_private *internals;
958         struct bond_tx_queue *bd_tx_q;
959
960         uint8_t num_of_slaves;
961         uint8_t slaves[RTE_MAX_ETHPORTS];
962          /* positions in slaves, not ID */
963         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
964         uint8_t distributing_count;
965
966         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
967         uint16_t i, j, op_slave_idx;
968         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
969
970         /* Allocate additional packets in case 8023AD mode. */
971         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
972         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
973
974         /* Total amount of packets in slave_bufs */
975         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
976         /* Slow packets placed in each slave */
977         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
978
979         bd_tx_q = (struct bond_tx_queue *)queue;
980         internals = bd_tx_q->dev_private;
981
982         /* Copy slave list to protect against slave up/down changes during tx
983          * bursting */
984         num_of_slaves = internals->active_slave_count;
985         if (num_of_slaves < 1)
986                 return num_tx_total;
987
988         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
989
990         distributing_count = 0;
991         for (i = 0; i < num_of_slaves; i++) {
992                 struct port *port = &mode_8023ad_ports[slaves[i]];
993
994                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
995                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
996                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
997
998                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
999                         slave_bufs[i][j] = slow_pkts[j];
1000
1001                 if (ACTOR_STATE(port, DISTRIBUTING))
1002                         distributing_offsets[distributing_count++] = i;
1003         }
1004
1005         if (likely(distributing_count > 0)) {
1006                 /* Populate slaves mbuf with the packets which are to be sent on it */
1007                 for (i = 0; i < nb_pkts; i++) {
1008                         /* Select output slave using hash based on xmit policy */
1009                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1010
1011                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1012                          * slaves that are currently distributing. */
1013                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1014                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1015                         slave_nb_pkts[slave_offset]++;
1016                 }
1017         }
1018
1019         /* Send packet burst on each slave device */
1020         for (i = 0; i < num_of_slaves; i++) {
1021                 if (slave_nb_pkts[i] == 0)
1022                         continue;
1023
1024                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1025                                 slave_bufs[i], slave_nb_pkts[i]);
1026
1027                 /* If tx burst fails drop slow packets */
1028                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1029                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1030
1031                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1032                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1033
1034                 /* If tx burst fails move packets to end of bufs */
1035                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1036                         uint16_t j = nb_pkts - num_tx_fail_total;
1037                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1038                                 bufs[j] = slave_bufs[i][num_tx_slave];
1039                 }
1040         }
1041
1042         return num_tx_total;
1043 }
1044
1045 static uint16_t
1046 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1047                 uint16_t nb_pkts)
1048 {
1049         struct bond_dev_private *internals;
1050         struct bond_tx_queue *bd_tx_q;
1051
1052         uint8_t tx_failed_flag = 0, num_of_slaves;
1053         uint8_t slaves[RTE_MAX_ETHPORTS];
1054
1055         uint16_t max_nb_of_tx_pkts = 0;
1056
1057         int slave_tx_total[RTE_MAX_ETHPORTS];
1058         int i, most_successful_tx_slave = -1;
1059
1060         bd_tx_q = (struct bond_tx_queue *)queue;
1061         internals = bd_tx_q->dev_private;
1062
1063         /* Copy slave list to protect against slave up/down changes during tx
1064          * bursting */
1065         num_of_slaves = internals->active_slave_count;
1066         memcpy(slaves, internals->active_slaves,
1067                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1068
1069         if (num_of_slaves < 1)
1070                 return 0;
1071
1072         /* Increment reference count on mbufs */
1073         for (i = 0; i < nb_pkts; i++)
1074                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1075
1076         /* Transmit burst on each active slave */
1077         for (i = 0; i < num_of_slaves; i++) {
1078                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1079                                         bufs, nb_pkts);
1080
1081                 if (unlikely(slave_tx_total[i] < nb_pkts))
1082                         tx_failed_flag = 1;
1083
1084                 /* record the value and slave index for the slave which transmits the
1085                  * maximum number of packets */
1086                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1087                         max_nb_of_tx_pkts = slave_tx_total[i];
1088                         most_successful_tx_slave = i;
1089                 }
1090         }
1091
1092         /* if slaves fail to transmit packets from burst, the calling application
1093          * is not expected to know about multiple references to packets so we must
1094          * handle failures of all packets except those of the most successful slave
1095          */
1096         if (unlikely(tx_failed_flag))
1097                 for (i = 0; i < num_of_slaves; i++)
1098                         if (i != most_successful_tx_slave)
1099                                 while (slave_tx_total[i] < nb_pkts)
1100                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1101
1102         return max_nb_of_tx_pkts;
1103 }
1104
1105 void
1106 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1107                 struct rte_eth_link *slave_dev_link)
1108 {
1109         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1110         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1111
1112         if (slave_dev_link->link_status &&
1113                 bonded_eth_dev->data->dev_started) {
1114                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1115                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1116
1117                 internals->link_props_set = 1;
1118         }
1119 }
1120
1121 void
1122 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1123 {
1124         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1125
1126         memset(&(bonded_eth_dev->data->dev_link), 0,
1127                         sizeof(bonded_eth_dev->data->dev_link));
1128
1129         internals->link_props_set = 0;
1130 }
1131
1132 int
1133 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1134                 struct rte_eth_link *slave_dev_link)
1135 {
1136         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1137                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1138                 return -1;
1139
1140         return 0;
1141 }
1142
1143 int
1144 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1145 {
1146         struct ether_addr *mac_addr;
1147
1148         if (eth_dev == NULL) {
1149                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1150                 return -1;
1151         }
1152
1153         if (dst_mac_addr == NULL) {
1154                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1155                 return -1;
1156         }
1157
1158         mac_addr = eth_dev->data->mac_addrs;
1159
1160         ether_addr_copy(mac_addr, dst_mac_addr);
1161         return 0;
1162 }
1163
1164 int
1165 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1166 {
1167         struct ether_addr *mac_addr;
1168
1169         if (eth_dev == NULL) {
1170                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1171                 return -1;
1172         }
1173
1174         if (new_mac_addr == NULL) {
1175                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1176                 return -1;
1177         }
1178
1179         mac_addr = eth_dev->data->mac_addrs;
1180
1181         /* If new MAC is different to current MAC then update */
1182         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1183                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1184
1185         return 0;
1186 }
1187
1188 int
1189 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1190 {
1191         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1192         int i;
1193
1194         /* Update slave devices MAC addresses */
1195         if (internals->slave_count < 1)
1196                 return -1;
1197
1198         switch (internals->mode) {
1199         case BONDING_MODE_ROUND_ROBIN:
1200         case BONDING_MODE_BALANCE:
1201         case BONDING_MODE_BROADCAST:
1202                 for (i = 0; i < internals->slave_count; i++) {
1203                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1204                                         bonded_eth_dev->data->mac_addrs)) {
1205                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1206                                                 internals->slaves[i].port_id);
1207                                 return -1;
1208                         }
1209                 }
1210                 break;
1211         case BONDING_MODE_8023AD:
1212                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1213                 break;
1214         case BONDING_MODE_ACTIVE_BACKUP:
1215         case BONDING_MODE_TLB:
1216         case BONDING_MODE_ALB:
1217         default:
1218                 for (i = 0; i < internals->slave_count; i++) {
1219                         if (internals->slaves[i].port_id ==
1220                                         internals->current_primary_port) {
1221                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1222                                                 bonded_eth_dev->data->mac_addrs)) {
1223                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1224                                                         internals->current_primary_port);
1225                                         return -1;
1226                                 }
1227                         } else {
1228                                 if (mac_address_set(
1229                                                 &rte_eth_devices[internals->slaves[i].port_id],
1230                                                 &internals->slaves[i].persisted_mac_addr)) {
1231                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1232                                                         internals->slaves[i].port_id);
1233                                         return -1;
1234                                 }
1235                         }
1236                 }
1237         }
1238
1239         return 0;
1240 }
1241
1242 int
1243 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1244 {
1245         struct bond_dev_private *internals;
1246
1247         internals = eth_dev->data->dev_private;
1248
1249         switch (mode) {
1250         case BONDING_MODE_ROUND_ROBIN:
1251                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1252                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1253                 break;
1254         case BONDING_MODE_ACTIVE_BACKUP:
1255                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1256                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1257                 break;
1258         case BONDING_MODE_BALANCE:
1259                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1260                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1261                 break;
1262         case BONDING_MODE_BROADCAST:
1263                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1264                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1265                 break;
1266         case BONDING_MODE_8023AD:
1267                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1268                         return -1;
1269
1270                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1271                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1272                 RTE_LOG(WARNING, PMD,
1273                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1274                                 "at least every 100ms.\n");
1275                 break;
1276         case BONDING_MODE_TLB:
1277                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1278                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1279                 break;
1280         case BONDING_MODE_ALB:
1281                 if (bond_mode_alb_enable(eth_dev) != 0)
1282                         return -1;
1283
1284                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1285                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1286                 break;
1287         default:
1288                 return -1;
1289         }
1290
1291         internals->mode = mode;
1292
1293         return 0;
1294 }
1295
1296 int
1297 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1298                 struct rte_eth_dev *slave_eth_dev)
1299 {
1300         struct bond_rx_queue *bd_rx_q;
1301         struct bond_tx_queue *bd_tx_q;
1302
1303         int errval;
1304         uint16_t q_id;
1305
1306         /* Stop slave */
1307         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1308
1309         /* Enable interrupts on slave device if supported */
1310         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1311                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1312
1313         /* If RSS is enabled for bonding, try to enable it for slaves  */
1314         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1315                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1316                                 != 0) {
1317                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1318                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1319                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1320                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1321                 } else {
1322                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1323                 }
1324
1325                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1326                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1327                 slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
1328         }
1329
1330         /* Configure device */
1331         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1332                         bonded_eth_dev->data->nb_rx_queues,
1333                         bonded_eth_dev->data->nb_tx_queues,
1334                         &(slave_eth_dev->data->dev_conf));
1335         if (errval != 0) {
1336                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1337                                 slave_eth_dev->data->port_id, errval);
1338                 return errval;
1339         }
1340
1341         /* Setup Rx Queues */
1342         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1343                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1344
1345                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1346                                 bd_rx_q->nb_rx_desc,
1347                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1348                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1349                 if (errval != 0) {
1350                         RTE_BOND_LOG(ERR,
1351                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1352                                         slave_eth_dev->data->port_id, q_id, errval);
1353                         return errval;
1354                 }
1355         }
1356
1357         /* Setup Tx Queues */
1358         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1359                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1360
1361                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1362                                 bd_tx_q->nb_tx_desc,
1363                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1364                                 &bd_tx_q->tx_conf);
1365                 if (errval != 0) {
1366                         RTE_BOND_LOG(ERR,
1367                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1368                                         slave_eth_dev->data->port_id, q_id, errval);
1369                         return errval;
1370                 }
1371         }
1372
1373         /* Start device */
1374         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1375         if (errval != 0) {
1376                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1377                                 slave_eth_dev->data->port_id, errval);
1378                 return -1;
1379         }
1380
1381         /* If RSS is enabled for bonding, synchronize RETA */
1382         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1383                 int i;
1384                 struct bond_dev_private *internals;
1385
1386                 internals = bonded_eth_dev->data->dev_private;
1387
1388                 for (i = 0; i < internals->slave_count; i++) {
1389                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1390                                 errval = rte_eth_dev_rss_reta_update(
1391                                                 slave_eth_dev->data->port_id,
1392                                                 &internals->reta_conf[0],
1393                                                 internals->slaves[i].reta_size);
1394                                 if (errval != 0) {
1395                                         RTE_LOG(WARNING, PMD,
1396                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1397                                                         " RSS Configuration for bonding may be inconsistent.\n",
1398                                                         slave_eth_dev->data->port_id, errval);
1399                                 }
1400                                 break;
1401                         }
1402                 }
1403         }
1404
1405         /* If lsc interrupt is set, check initial slave's link status */
1406         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1407                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1408                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1409
1410         return 0;
1411 }
1412
1413 void
1414 slave_remove(struct bond_dev_private *internals,
1415                 struct rte_eth_dev *slave_eth_dev)
1416 {
1417         uint8_t i;
1418
1419         for (i = 0; i < internals->slave_count; i++)
1420                 if (internals->slaves[i].port_id ==
1421                                 slave_eth_dev->data->port_id)
1422                         break;
1423
1424         if (i < (internals->slave_count - 1))
1425                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1426                                 sizeof(internals->slaves[0]) *
1427                                 (internals->slave_count - i - 1));
1428
1429         internals->slave_count--;
1430 }
1431
1432 static void
1433 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1434
1435 void
1436 slave_add(struct bond_dev_private *internals,
1437                 struct rte_eth_dev *slave_eth_dev)
1438 {
1439         struct bond_slave_details *slave_details =
1440                         &internals->slaves[internals->slave_count];
1441
1442         slave_details->port_id = slave_eth_dev->data->port_id;
1443         slave_details->last_link_status = 0;
1444
1445         /* If slave device doesn't support interrupts then we need to enabled
1446          * polling to monitor link status */
1447         if (!(slave_eth_dev->data->dev_flags & RTE_PCI_DRV_INTR_LSC)) {
1448                 slave_details->link_status_poll_enabled = 1;
1449
1450                 if (!internals->link_status_polling_enabled) {
1451                         internals->link_status_polling_enabled = 1;
1452
1453                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1454                                         bond_ethdev_slave_link_status_change_monitor,
1455                                         (void *)&rte_eth_devices[internals->port_id]);
1456                 }
1457         }
1458
1459         slave_details->link_status_wait_to_complete = 0;
1460         /* clean tlb_last_obytes when adding port for bonding device */
1461         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1462                         sizeof(struct ether_addr));
1463 }
1464
1465 void
1466 bond_ethdev_primary_set(struct bond_dev_private *internals,
1467                 uint8_t slave_port_id)
1468 {
1469         int i;
1470
1471         if (internals->active_slave_count < 1)
1472                 internals->current_primary_port = slave_port_id;
1473         else
1474                 /* Search bonded device slave ports for new proposed primary port */
1475                 for (i = 0; i < internals->active_slave_count; i++) {
1476                         if (internals->active_slaves[i] == slave_port_id)
1477                                 internals->current_primary_port = slave_port_id;
1478                 }
1479 }
1480
1481 static void
1482 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1483
1484 static int
1485 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1486 {
1487         struct bond_dev_private *internals;
1488         int i;
1489
1490         /* slave eth dev will be started by bonded device */
1491         if (check_for_bonded_ethdev(eth_dev)) {
1492                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1493                                 eth_dev->data->port_id);
1494                 return -1;
1495         }
1496
1497         eth_dev->data->dev_link.link_status = 0;
1498         eth_dev->data->dev_started = 1;
1499
1500         internals = eth_dev->data->dev_private;
1501
1502         if (internals->slave_count == 0) {
1503                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1504                 return -1;
1505         }
1506
1507         if (internals->user_defined_mac == 0) {
1508                 struct ether_addr *new_mac_addr = NULL;
1509
1510                 for (i = 0; i < internals->slave_count; i++)
1511                         if (internals->slaves[i].port_id == internals->primary_port)
1512                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1513
1514                 if (new_mac_addr == NULL)
1515                         return -1;
1516
1517                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1518                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1519                                         eth_dev->data->port_id);
1520                         return -1;
1521                 }
1522         }
1523
1524         /* Update all slave devices MACs*/
1525         if (mac_address_slaves_update(eth_dev) != 0)
1526                 return -1;
1527
1528         /* If bonded device is configure in promiscuous mode then re-apply config */
1529         if (internals->promiscuous_en)
1530                 bond_ethdev_promiscuous_enable(eth_dev);
1531
1532         /* Reconfigure each slave device if starting bonded device */
1533         for (i = 0; i < internals->slave_count; i++) {
1534                 if (slave_configure(eth_dev,
1535                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1536                         RTE_BOND_LOG(ERR,
1537                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1538                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1539                         return -1;
1540                 }
1541         }
1542
1543         if (internals->user_defined_primary_port)
1544                 bond_ethdev_primary_set(internals, internals->primary_port);
1545
1546         if (internals->mode == BONDING_MODE_8023AD)
1547                 bond_mode_8023ad_start(eth_dev);
1548
1549         if (internals->mode == BONDING_MODE_TLB ||
1550                         internals->mode == BONDING_MODE_ALB)
1551                 bond_tlb_enable(internals);
1552
1553         return 0;
1554 }
1555
1556 static void
1557 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1558 {
1559         uint8_t i;
1560
1561         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1562                 rte_free(dev->data->rx_queues[i]);
1563                 dev->data->rx_queues[i] = NULL;
1564         }
1565         dev->data->nb_rx_queues = 0;
1566
1567         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1568                 rte_free(dev->data->tx_queues[i]);
1569                 dev->data->tx_queues[i] = NULL;
1570         }
1571         dev->data->nb_tx_queues = 0;
1572 }
1573
1574 void
1575 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1576 {
1577         struct bond_dev_private *internals = eth_dev->data->dev_private;
1578         uint8_t i;
1579
1580         if (internals->mode == BONDING_MODE_8023AD) {
1581                 struct port *port;
1582                 void *pkt = NULL;
1583
1584                 bond_mode_8023ad_stop(eth_dev);
1585
1586                 /* Discard all messages to/from mode 4 state machines */
1587                 for (i = 0; i < internals->active_slave_count; i++) {
1588                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1589
1590                         RTE_VERIFY(port->rx_ring != NULL);
1591                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1592                                 rte_pktmbuf_free(pkt);
1593
1594                         RTE_VERIFY(port->tx_ring != NULL);
1595                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1596                                 rte_pktmbuf_free(pkt);
1597                 }
1598         }
1599
1600         if (internals->mode == BONDING_MODE_TLB ||
1601                         internals->mode == BONDING_MODE_ALB) {
1602                 bond_tlb_disable(internals);
1603                 for (i = 0; i < internals->active_slave_count; i++)
1604                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1605         }
1606
1607         internals->active_slave_count = 0;
1608         internals->link_status_polling_enabled = 0;
1609
1610         eth_dev->data->dev_link.link_status = 0;
1611         eth_dev->data->dev_started = 0;
1612 }
1613
1614 void
1615 bond_ethdev_close(struct rte_eth_dev *dev)
1616 {
1617         bond_ethdev_free_queues(dev);
1618 }
1619
1620 /* forward declaration */
1621 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1622
1623 static void
1624 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1625 {
1626         struct bond_dev_private *internals = dev->data->dev_private;
1627
1628         dev_info->max_mac_addrs = 1;
1629
1630         dev_info->max_rx_pktlen = (uint32_t)2048;
1631
1632         dev_info->max_rx_queues = (uint16_t)128;
1633         dev_info->max_tx_queues = (uint16_t)512;
1634
1635         dev_info->min_rx_bufsize = 0;
1636         dev_info->pci_dev = NULL;
1637
1638         dev_info->rx_offload_capa = internals->rx_offload_capa;
1639         dev_info->tx_offload_capa = internals->tx_offload_capa;
1640         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1641
1642         dev_info->reta_size = internals->reta_size;
1643 }
1644
1645 static int
1646 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1647                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1648                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1649 {
1650         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1651                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1652                                         0, dev->data->numa_node);
1653         if (bd_rx_q == NULL)
1654                 return -1;
1655
1656         bd_rx_q->queue_id = rx_queue_id;
1657         bd_rx_q->dev_private = dev->data->dev_private;
1658
1659         bd_rx_q->nb_rx_desc = nb_rx_desc;
1660
1661         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1662         bd_rx_q->mb_pool = mb_pool;
1663
1664         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1665
1666         return 0;
1667 }
1668
1669 static int
1670 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1671                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1672                 const struct rte_eth_txconf *tx_conf)
1673 {
1674         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1675                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1676                                         0, dev->data->numa_node);
1677
1678         if (bd_tx_q == NULL)
1679                 return -1;
1680
1681         bd_tx_q->queue_id = tx_queue_id;
1682         bd_tx_q->dev_private = dev->data->dev_private;
1683
1684         bd_tx_q->nb_tx_desc = nb_tx_desc;
1685         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1686
1687         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1688
1689         return 0;
1690 }
1691
1692 static void
1693 bond_ethdev_rx_queue_release(void *queue)
1694 {
1695         if (queue == NULL)
1696                 return;
1697
1698         rte_free(queue);
1699 }
1700
1701 static void
1702 bond_ethdev_tx_queue_release(void *queue)
1703 {
1704         if (queue == NULL)
1705                 return;
1706
1707         rte_free(queue);
1708 }
1709
1710 static void
1711 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1712 {
1713         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1714         struct bond_dev_private *internals;
1715
1716         /* Default value for polling slave found is true as we don't want to
1717          * disable the polling thread if we cannot get the lock */
1718         int i, polling_slave_found = 1;
1719
1720         if (cb_arg == NULL)
1721                 return;
1722
1723         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1724         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1725
1726         if (!bonded_ethdev->data->dev_started ||
1727                 !internals->link_status_polling_enabled)
1728                 return;
1729
1730         /* If device is currently being configured then don't check slaves link
1731          * status, wait until next period */
1732         if (rte_spinlock_trylock(&internals->lock)) {
1733                 if (internals->slave_count > 0)
1734                         polling_slave_found = 0;
1735
1736                 for (i = 0; i < internals->slave_count; i++) {
1737                         if (!internals->slaves[i].link_status_poll_enabled)
1738                                 continue;
1739
1740                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1741                         polling_slave_found = 1;
1742
1743                         /* Update slave link status */
1744                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1745                                         internals->slaves[i].link_status_wait_to_complete);
1746
1747                         /* if link status has changed since last checked then call lsc
1748                          * event callback */
1749                         if (slave_ethdev->data->dev_link.link_status !=
1750                                         internals->slaves[i].last_link_status) {
1751                                 internals->slaves[i].last_link_status =
1752                                                 slave_ethdev->data->dev_link.link_status;
1753
1754                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1755                                                 RTE_ETH_EVENT_INTR_LSC,
1756                                                 &bonded_ethdev->data->port_id);
1757                         }
1758                 }
1759                 rte_spinlock_unlock(&internals->lock);
1760         }
1761
1762         if (polling_slave_found)
1763                 /* Set alarm to continue monitoring link status of slave ethdev's */
1764                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1765                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1766 }
1767
1768 static int
1769 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1770                 int wait_to_complete)
1771 {
1772         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1773
1774         if (!bonded_eth_dev->data->dev_started ||
1775                 internals->active_slave_count == 0) {
1776                 bonded_eth_dev->data->dev_link.link_status = 0;
1777                 return 0;
1778         } else {
1779                 struct rte_eth_dev *slave_eth_dev;
1780                 int i, link_up = 0;
1781
1782                 for (i = 0; i < internals->active_slave_count; i++) {
1783                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1784
1785                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1786                                         wait_to_complete);
1787                         if (slave_eth_dev->data->dev_link.link_status == 1) {
1788                                 link_up = 1;
1789                                 break;
1790                         }
1791                 }
1792
1793                 bonded_eth_dev->data->dev_link.link_status = link_up;
1794         }
1795
1796         return 0;
1797 }
1798
1799 static void
1800 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1801 {
1802         struct bond_dev_private *internals = dev->data->dev_private;
1803         struct rte_eth_stats slave_stats;
1804         int i, j;
1805
1806         for (i = 0; i < internals->slave_count; i++) {
1807                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1808
1809                 stats->ipackets += slave_stats.ipackets;
1810                 stats->opackets += slave_stats.opackets;
1811                 stats->ibytes += slave_stats.ibytes;
1812                 stats->obytes += slave_stats.obytes;
1813                 stats->ierrors += slave_stats.ierrors;
1814                 stats->oerrors += slave_stats.oerrors;
1815                 stats->imcasts += slave_stats.imcasts;
1816                 stats->rx_nombuf += slave_stats.rx_nombuf;
1817                 stats->fdirmatch += slave_stats.fdirmatch;
1818                 stats->fdirmiss += slave_stats.fdirmiss;
1819                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1820                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1821                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1822                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1823
1824                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1825                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1826                         stats->q_opackets[j] += slave_stats.q_opackets[j];
1827                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1828                         stats->q_obytes[j] += slave_stats.q_obytes[j];
1829                         stats->q_errors[j] += slave_stats.q_errors[j];
1830                 }
1831
1832         }
1833 }
1834
1835 static void
1836 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1837 {
1838         struct bond_dev_private *internals = dev->data->dev_private;
1839         int i;
1840
1841         for (i = 0; i < internals->slave_count; i++)
1842                 rte_eth_stats_reset(internals->slaves[i].port_id);
1843 }
1844
1845 static void
1846 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1847 {
1848         struct bond_dev_private *internals = eth_dev->data->dev_private;
1849         int i;
1850
1851         internals->promiscuous_en = 1;
1852
1853         switch (internals->mode) {
1854         /* Promiscuous mode is propagated to all slaves */
1855         case BONDING_MODE_ROUND_ROBIN:
1856         case BONDING_MODE_BALANCE:
1857         case BONDING_MODE_BROADCAST:
1858                 for (i = 0; i < internals->slave_count; i++)
1859                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1860                 break;
1861         /* In mode4 promiscus mode is managed when slave is added/removed */
1862         case BONDING_MODE_8023AD:
1863                 break;
1864         /* Promiscuous mode is propagated only to primary slave */
1865         case BONDING_MODE_ACTIVE_BACKUP:
1866         case BONDING_MODE_TLB:
1867         case BONDING_MODE_ALB:
1868         default:
1869                 rte_eth_promiscuous_enable(internals->current_primary_port);
1870         }
1871 }
1872
1873 static void
1874 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1875 {
1876         struct bond_dev_private *internals = dev->data->dev_private;
1877         int i;
1878
1879         internals->promiscuous_en = 0;
1880
1881         switch (internals->mode) {
1882         /* Promiscuous mode is propagated to all slaves */
1883         case BONDING_MODE_ROUND_ROBIN:
1884         case BONDING_MODE_BALANCE:
1885         case BONDING_MODE_BROADCAST:
1886                 for (i = 0; i < internals->slave_count; i++)
1887                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1888                 break;
1889         /* In mode4 promiscus mode is set managed when slave is added/removed */
1890         case BONDING_MODE_8023AD:
1891                 break;
1892         /* Promiscuous mode is propagated only to primary slave */
1893         case BONDING_MODE_ACTIVE_BACKUP:
1894         case BONDING_MODE_TLB:
1895         case BONDING_MODE_ALB:
1896         default:
1897                 rte_eth_promiscuous_disable(internals->current_primary_port);
1898         }
1899 }
1900
1901 static void
1902 bond_ethdev_delayed_lsc_propagation(void *arg)
1903 {
1904         if (arg == NULL)
1905                 return;
1906
1907         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1908                         RTE_ETH_EVENT_INTR_LSC);
1909 }
1910
1911 void
1912 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1913                 void *param)
1914 {
1915         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1916         struct bond_dev_private *internals;
1917         struct rte_eth_link link;
1918
1919         int i, valid_slave = 0;
1920         uint8_t active_pos;
1921         uint8_t lsc_flag = 0;
1922
1923         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1924                 return;
1925
1926         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1927         slave_eth_dev = &rte_eth_devices[port_id];
1928
1929         if (check_for_bonded_ethdev(bonded_eth_dev))
1930                 return;
1931
1932         internals = bonded_eth_dev->data->dev_private;
1933
1934         /* If the device isn't started don't handle interrupts */
1935         if (!bonded_eth_dev->data->dev_started)
1936                 return;
1937
1938         /* verify that port_id is a valid slave of bonded port */
1939         for (i = 0; i < internals->slave_count; i++) {
1940                 if (internals->slaves[i].port_id == port_id) {
1941                         valid_slave = 1;
1942                         break;
1943                 }
1944         }
1945
1946         if (!valid_slave)
1947                 return;
1948
1949         /* Search for port in active port list */
1950         active_pos = find_slave_by_id(internals->active_slaves,
1951                         internals->active_slave_count, port_id);
1952
1953         rte_eth_link_get_nowait(port_id, &link);
1954         if (link.link_status) {
1955                 if (active_pos < internals->active_slave_count)
1956                         return;
1957
1958                 /* if no active slave ports then set this port to be primary port */
1959                 if (internals->active_slave_count < 1) {
1960                         /* If first active slave, then change link status */
1961                         bonded_eth_dev->data->dev_link.link_status = 1;
1962                         internals->current_primary_port = port_id;
1963                         lsc_flag = 1;
1964
1965                         mac_address_slaves_update(bonded_eth_dev);
1966
1967                         /* Inherit eth dev link properties from first active slave */
1968                         link_properties_set(bonded_eth_dev,
1969                                         &(slave_eth_dev->data->dev_link));
1970                 }
1971
1972                 activate_slave(bonded_eth_dev, port_id);
1973
1974                 /* If user has defined the primary port then default to using it */
1975                 if (internals->user_defined_primary_port &&
1976                                 internals->primary_port == port_id)
1977                         bond_ethdev_primary_set(internals, port_id);
1978         } else {
1979                 if (active_pos == internals->active_slave_count)
1980                         return;
1981
1982                 /* Remove from active slave list */
1983                 deactivate_slave(bonded_eth_dev, port_id);
1984
1985                 /* No active slaves, change link status to down and reset other
1986                  * link properties */
1987                 if (internals->active_slave_count < 1) {
1988                         lsc_flag = 1;
1989                         bonded_eth_dev->data->dev_link.link_status = 0;
1990
1991                         link_properties_reset(bonded_eth_dev);
1992                 }
1993
1994                 /* Update primary id, take first active slave from list or if none
1995                  * available set to -1 */
1996                 if (port_id == internals->current_primary_port) {
1997                         if (internals->active_slave_count > 0)
1998                                 bond_ethdev_primary_set(internals,
1999                                                 internals->active_slaves[0]);
2000                         else
2001                                 internals->current_primary_port = internals->primary_port;
2002                 }
2003         }
2004
2005         if (lsc_flag) {
2006                 /* Cancel any possible outstanding interrupts if delays are enabled */
2007                 if (internals->link_up_delay_ms > 0 ||
2008                         internals->link_down_delay_ms > 0)
2009                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2010                                         bonded_eth_dev);
2011
2012                 if (bonded_eth_dev->data->dev_link.link_status) {
2013                         if (internals->link_up_delay_ms > 0)
2014                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2015                                                 bond_ethdev_delayed_lsc_propagation,
2016                                                 (void *)bonded_eth_dev);
2017                         else
2018                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2019                                                 RTE_ETH_EVENT_INTR_LSC);
2020
2021                 } else {
2022                         if (internals->link_down_delay_ms > 0)
2023                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2024                                                 bond_ethdev_delayed_lsc_propagation,
2025                                                 (void *)bonded_eth_dev);
2026                         else
2027                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2028                                                 RTE_ETH_EVENT_INTR_LSC);
2029                 }
2030         }
2031 }
2032
2033 static int
2034 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2035                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2036 {
2037         unsigned i, j;
2038         int result = 0;
2039         int slave_reta_size;
2040         unsigned reta_count;
2041         struct bond_dev_private *internals = dev->data->dev_private;
2042
2043         if (reta_size != internals->reta_size)
2044                 return -EINVAL;
2045
2046          /* Copy RETA table */
2047         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2048
2049         for (i = 0; i < reta_count; i++) {
2050                 internals->reta_conf[i].mask = reta_conf[i].mask;
2051                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2052                         if ((reta_conf[i].mask >> j) & 0x01)
2053                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2054         }
2055
2056         /* Fill rest of array */
2057         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2058                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2059                                 sizeof(internals->reta_conf[0]) * reta_count);
2060
2061         /* Propagate RETA over slaves */
2062         for (i = 0; i < internals->slave_count; i++) {
2063                 slave_reta_size = internals->slaves[i].reta_size;
2064                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2065                                 &internals->reta_conf[0], slave_reta_size);
2066                 if (result < 0)
2067                         return result;
2068         }
2069
2070         return 0;
2071 }
2072
2073 static int
2074 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2075                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2076 {
2077         int i, j;
2078         struct bond_dev_private *internals = dev->data->dev_private;
2079
2080         if (reta_size != internals->reta_size)
2081                 return -EINVAL;
2082
2083          /* Copy RETA table */
2084         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2085                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2086                         if ((reta_conf[i].mask >> j) & 0x01)
2087                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2088
2089         return 0;
2090 }
2091
2092 static int
2093 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2094                 struct rte_eth_rss_conf *rss_conf)
2095 {
2096         int i, result = 0;
2097         struct bond_dev_private *internals = dev->data->dev_private;
2098         struct rte_eth_rss_conf bond_rss_conf;
2099
2100         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2101
2102         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2103
2104         if (bond_rss_conf.rss_hf != 0)
2105                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2106
2107         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2108                         sizeof(internals->rss_key)) {
2109                 if (bond_rss_conf.rss_key_len == 0)
2110                         bond_rss_conf.rss_key_len = 40;
2111                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2112                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2113                                 internals->rss_key_len);
2114         }
2115
2116         for (i = 0; i < internals->slave_count; i++) {
2117                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2118                                 &bond_rss_conf);
2119                 if (result < 0)
2120                         return result;
2121         }
2122
2123         return 0;
2124 }
2125
2126 static int
2127 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2128                 struct rte_eth_rss_conf *rss_conf)
2129 {
2130         struct bond_dev_private *internals = dev->data->dev_private;
2131
2132         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2133         rss_conf->rss_key_len = internals->rss_key_len;
2134         if (rss_conf->rss_key)
2135                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2136
2137         return 0;
2138 }
2139
2140 struct eth_dev_ops default_dev_ops = {
2141                 .dev_start            = bond_ethdev_start,
2142                 .dev_stop             = bond_ethdev_stop,
2143                 .dev_close            = bond_ethdev_close,
2144                 .dev_configure        = bond_ethdev_configure,
2145                 .dev_infos_get        = bond_ethdev_info,
2146                 .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2147                 .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2148                 .rx_queue_release     = bond_ethdev_rx_queue_release,
2149                 .tx_queue_release     = bond_ethdev_tx_queue_release,
2150                 .link_update          = bond_ethdev_link_update,
2151                 .stats_get            = bond_ethdev_stats_get,
2152                 .stats_reset          = bond_ethdev_stats_reset,
2153                 .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2154                 .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2155                 .reta_update          = bond_ethdev_rss_reta_update,
2156                 .reta_query           = bond_ethdev_rss_reta_query,
2157                 .rss_hash_update      = bond_ethdev_rss_hash_update,
2158                 .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2159 };
2160
2161 static int
2162 bond_init(const char *name, const char *params)
2163 {
2164         struct bond_dev_private *internals;
2165         struct rte_kvargs *kvlist;
2166         uint8_t bonding_mode, socket_id;
2167         int  arg_count, port_id;
2168
2169         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2170
2171         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2172         if (kvlist == NULL)
2173                 return -1;
2174
2175         /* Parse link bonding mode */
2176         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2177                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2178                                 &bond_ethdev_parse_slave_mode_kvarg,
2179                                 &bonding_mode) != 0) {
2180                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2181                                         name);
2182                         goto parse_error;
2183                 }
2184         } else {
2185                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2186                                 "device %s\n", name);
2187                 goto parse_error;
2188         }
2189
2190         /* Parse socket id to create bonding device on */
2191         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2192         if (arg_count == 1) {
2193                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2194                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2195                                 != 0) {
2196                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2197                                         "bonded device %s\n", name);
2198                         goto parse_error;
2199                 }
2200         } else if (arg_count > 1) {
2201                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2202                                 "bonded device %s\n", name);
2203                 goto parse_error;
2204         } else {
2205                 socket_id = rte_socket_id();
2206         }
2207
2208         /* Create link bonding eth device */
2209         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2210         if (port_id < 0) {
2211                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2212                                 "socket %u.\n", name, bonding_mode, socket_id);
2213                 goto parse_error;
2214         }
2215         internals = rte_eth_devices[port_id].data->dev_private;
2216         internals->kvlist = kvlist;
2217
2218         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2219                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2220         return 0;
2221
2222 parse_error:
2223         rte_kvargs_free(kvlist);
2224
2225         return -1;
2226 }
2227
2228 static int
2229 bond_uninit(const char *name)
2230 {
2231         int  ret;
2232
2233         if (name == NULL)
2234                 return -EINVAL;
2235
2236         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2237
2238         /* free link bonding eth device */
2239         ret = rte_eth_bond_free(name);
2240         if (ret < 0)
2241                 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2242
2243         return ret;
2244 }
2245
2246 /* this part will resolve the slave portids after all the other pdev and vdev
2247  * have been allocated */
2248 static int
2249 bond_ethdev_configure(struct rte_eth_dev *dev)
2250 {
2251         char *name = dev->data->name;
2252         struct bond_dev_private *internals = dev->data->dev_private;
2253         struct rte_kvargs *kvlist = internals->kvlist;
2254         int arg_count;
2255         uint8_t port_id = dev - rte_eth_devices;
2256
2257         static const uint8_t default_rss_key[40] = {
2258                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2259                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2260                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2261                 0xBE, 0xAC, 0x01, 0xFA
2262         };
2263
2264         unsigned i, j;
2265
2266         /* If RSS is enabled, fill table and key with default values */
2267         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2268                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2269                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2270                 memcpy(internals->rss_key, default_rss_key, 40);
2271
2272                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2273                         internals->reta_conf[i].mask = ~0LL;
2274                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2275                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2276                 }
2277         }
2278
2279         /*
2280          * if no kvlist, it means that this bonded device has been created
2281          * through the bonding api.
2282          */
2283         if (!kvlist)
2284                 return 0;
2285
2286         /* Parse MAC address for bonded device */
2287         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2288         if (arg_count == 1) {
2289                 struct ether_addr bond_mac;
2290
2291                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2292                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2293                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2294                                         name);
2295                         return -1;
2296                 }
2297
2298                 /* Set MAC address */
2299                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2300                         RTE_LOG(ERR, EAL,
2301                                         "Failed to set mac address on bonded device %s\n",
2302                                         name);
2303                         return -1;
2304                 }
2305         } else if (arg_count > 1) {
2306                 RTE_LOG(ERR, EAL,
2307                                 "MAC address can be specified only once for bonded device %s\n",
2308                                 name);
2309                 return -1;
2310         }
2311
2312         /* Parse/set balance mode transmit policy */
2313         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2314         if (arg_count == 1) {
2315                 uint8_t xmit_policy;
2316
2317                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2318                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2319                                                 0) {
2320                         RTE_LOG(INFO, EAL,
2321                                         "Invalid xmit policy specified for bonded device %s\n",
2322                                         name);
2323                         return -1;
2324                 }
2325
2326                 /* Set balance mode transmit policy*/
2327                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2328                         RTE_LOG(ERR, EAL,
2329                                         "Failed to set balance xmit policy on bonded device %s\n",
2330                                         name);
2331                         return -1;
2332                 }
2333         } else if (arg_count > 1) {
2334                 RTE_LOG(ERR, EAL,
2335                                 "Transmit policy can be specified only once for bonded device"
2336                                 " %s\n", name);
2337                 return -1;
2338         }
2339
2340         /* Parse/add slave ports to bonded device */
2341         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2342                 struct bond_ethdev_slave_ports slave_ports;
2343                 unsigned i;
2344
2345                 memset(&slave_ports, 0, sizeof(slave_ports));
2346
2347                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2348                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2349                         RTE_LOG(ERR, EAL,
2350                                         "Failed to parse slave ports for bonded device %s\n",
2351                                         name);
2352                         return -1;
2353                 }
2354
2355                 for (i = 0; i < slave_ports.slave_count; i++) {
2356                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2357                                 RTE_LOG(ERR, EAL,
2358                                                 "Failed to add port %d as slave to bonded device %s\n",
2359                                                 slave_ports.slaves[i], name);
2360                         }
2361                 }
2362
2363         } else {
2364                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2365                 return -1;
2366         }
2367
2368         /* Parse/set primary slave port id*/
2369         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2370         if (arg_count == 1) {
2371                 uint8_t primary_slave_port_id;
2372
2373                 if (rte_kvargs_process(kvlist,
2374                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
2375                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2376                                 &primary_slave_port_id) < 0) {
2377                         RTE_LOG(INFO, EAL,
2378                                         "Invalid primary slave port id specified for bonded device"
2379                                         " %s\n", name);
2380                         return -1;
2381                 }
2382
2383                 /* Set balance mode transmit policy*/
2384                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2385                                 != 0) {
2386                         RTE_LOG(ERR, EAL,
2387                                         "Failed to set primary slave port %d on bonded device %s\n",
2388                                         primary_slave_port_id, name);
2389                         return -1;
2390                 }
2391         } else if (arg_count > 1) {
2392                 RTE_LOG(INFO, EAL,
2393                                 "Primary slave can be specified only once for bonded device"
2394                                 " %s\n", name);
2395                 return -1;
2396         }
2397
2398         /* Parse link status monitor polling interval */
2399         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2400         if (arg_count == 1) {
2401                 uint32_t lsc_poll_interval_ms;
2402
2403                 if (rte_kvargs_process(kvlist,
2404                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2405                                 &bond_ethdev_parse_time_ms_kvarg,
2406                                 &lsc_poll_interval_ms) < 0) {
2407                         RTE_LOG(INFO, EAL,
2408                                         "Invalid lsc polling interval value specified for bonded"
2409                                         " device %s\n", name);
2410                         return -1;
2411                 }
2412
2413                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2414                                 != 0) {
2415                         RTE_LOG(ERR, EAL,
2416                                         "Failed to set lsc monitor polling interval (%u ms) on"
2417                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2418                         return -1;
2419                 }
2420         } else if (arg_count > 1) {
2421                 RTE_LOG(INFO, EAL,
2422                                 "LSC polling interval can be specified only once for bonded"
2423                                 " device %s\n", name);
2424                 return -1;
2425         }
2426
2427         /* Parse link up interrupt propagation delay */
2428         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2429         if (arg_count == 1) {
2430                 uint32_t link_up_delay_ms;
2431
2432                 if (rte_kvargs_process(kvlist,
2433                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2434                                 &bond_ethdev_parse_time_ms_kvarg,
2435                                 &link_up_delay_ms) < 0) {
2436                         RTE_LOG(INFO, EAL,
2437                                         "Invalid link up propagation delay value specified for"
2438                                         " bonded device %s\n", name);
2439                         return -1;
2440                 }
2441
2442                 /* Set balance mode transmit policy*/
2443                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2444                                 != 0) {
2445                         RTE_LOG(ERR, EAL,
2446                                         "Failed to set link up propagation delay (%u ms) on bonded"
2447                                         " device %s\n", link_up_delay_ms, name);
2448                         return -1;
2449                 }
2450         } else if (arg_count > 1) {
2451                 RTE_LOG(INFO, EAL,
2452                                 "Link up propagation delay can be specified only once for"
2453                                 " bonded device %s\n", name);
2454                 return -1;
2455         }
2456
2457         /* Parse link down interrupt propagation delay */
2458         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2459         if (arg_count == 1) {
2460                 uint32_t link_down_delay_ms;
2461
2462                 if (rte_kvargs_process(kvlist,
2463                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2464                                 &bond_ethdev_parse_time_ms_kvarg,
2465                                 &link_down_delay_ms) < 0) {
2466                         RTE_LOG(INFO, EAL,
2467                                         "Invalid link down propagation delay value specified for"
2468                                         " bonded device %s\n", name);
2469                         return -1;
2470                 }
2471
2472                 /* Set balance mode transmit policy*/
2473                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2474                                 != 0) {
2475                         RTE_LOG(ERR, EAL,
2476                                         "Failed to set link down propagation delay (%u ms) on"
2477                                         " bonded device %s\n", link_down_delay_ms, name);
2478                         return -1;
2479                 }
2480         } else if (arg_count > 1) {
2481                 RTE_LOG(INFO, EAL,
2482                                 "Link down propagation delay can be specified only once for"
2483                                 " bonded device %s\n", name);
2484                 return -1;
2485         }
2486
2487         return 0;
2488 }
2489
2490 static struct rte_driver bond_drv = {
2491         .name = "eth_bond",
2492         .type = PMD_VDEV,
2493         .init = bond_init,
2494         .uninit = bond_uninit,
2495 };
2496
2497 PMD_REGISTER_DRIVER(bond_drv);