bonding: fix freeing with no queue
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
44 #include <rte_dev.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
47
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
51
52 #define REORDER_PERIOD_MS 10
53
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
55
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
58
59 static inline size_t
60 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
61 {
62         size_t vlan_offset = 0;
63
64         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
65                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
66
67                 vlan_offset = sizeof(struct vlan_hdr);
68                 *proto = vlan_hdr->eth_proto;
69
70                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
71                         vlan_hdr = vlan_hdr + 1;
72                         *proto = vlan_hdr->eth_proto;
73                         vlan_offset += sizeof(struct vlan_hdr);
74                 }
75         }
76         return vlan_offset;
77 }
78
79 static uint16_t
80 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
81 {
82         struct bond_dev_private *internals;
83
84         uint16_t num_rx_slave = 0;
85         uint16_t num_rx_total = 0;
86
87         int i;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94
95         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
96                 /* Offset of pointer to *bufs increases as packets are received
97                  * from other slaves */
98                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
99                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
100                 if (num_rx_slave) {
101                         num_rx_total += num_rx_slave;
102                         nb_pkts -= num_rx_slave;
103                 }
104         }
105
106         return num_rx_total;
107 }
108
109 static uint16_t
110 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
111                 uint16_t nb_pkts)
112 {
113         struct bond_dev_private *internals;
114
115         /* Cast to structure, containing bonded device's port id and queue id */
116         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
117
118         internals = bd_rx_q->dev_private;
119
120         return rte_eth_rx_burst(internals->current_primary_port,
121                         bd_rx_q->queue_id, bufs, nb_pkts);
122 }
123
124 static uint16_t
125 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
126                 uint16_t nb_pkts)
127 {
128         /* Cast to structure, containing bonded device's port id and queue id */
129         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
130         struct bond_dev_private *internals = bd_rx_q->dev_private;
131         struct ether_addr bond_mac;
132
133         struct ether_hdr *hdr;
134
135         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136         uint16_t num_rx_total = 0;      /* Total number of received packets */
137         uint8_t slaves[RTE_MAX_ETHPORTS];
138         uint8_t slave_count;
139
140         uint8_t collecting;  /* current slave collecting status */
141         const uint8_t promisc = internals->promiscuous_en;
142         uint8_t i, j, k;
143
144         rte_eth_macaddr_get(internals->port_id, &bond_mac);
145         /* Copy slave list to protect against slave up/down changes during tx
146          * bursting */
147         slave_count = internals->active_slave_count;
148         memcpy(slaves, internals->active_slaves,
149                         sizeof(internals->active_slaves[0]) * slave_count);
150
151         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
152                 j = num_rx_total;
153                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
154
155                 /* Read packets from this slave */
156                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
157                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
158
159                 for (k = j; k < 2 && k < num_rx_total; k++)
160                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
161
162                 /* Handle slow protocol packets. */
163                 while (j < num_rx_total) {
164                         if (j + 3 < num_rx_total)
165                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
166
167                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
168                         /* Remove packet from array if it is slow packet or slave is not
169                          * in collecting state or bondign interface is not in promiscus
170                          * mode and packet address does not match. */
171                         if (unlikely(hdr->ether_type == ether_type_slow_be ||
172                                 !collecting || (!promisc &&
173                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
174
175                                 if (hdr->ether_type == ether_type_slow_be) {
176                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
177                                                 bufs[j]);
178                                 } else
179                                         rte_pktmbuf_free(bufs[j]);
180
181                                 /* Packet is managed by mode 4 or dropped, shift the array */
182                                 num_rx_total--;
183                                 if (j < num_rx_total) {
184                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
185                                                 (num_rx_total - j));
186                                 }
187                         } else
188                                 j++;
189                 }
190         }
191
192         return num_rx_total;
193 }
194
195 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
196 uint32_t burstnumberRX;
197 uint32_t burstnumberTX;
198
199 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
200
201 static void
202 arp_op_name(uint16_t arp_op, char *buf)
203 {
204         switch (arp_op) {
205         case ARP_OP_REQUEST:
206                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
207                 return;
208         case ARP_OP_REPLY:
209                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
210                 return;
211         case ARP_OP_REVREQUEST:
212                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
213                                 "Reverse ARP Request");
214                 return;
215         case ARP_OP_REVREPLY:
216                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
217                                 "Reverse ARP Reply");
218                 return;
219         case ARP_OP_INVREQUEST:
220                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
221                                 "Peer Identify Request");
222                 return;
223         case ARP_OP_INVREPLY:
224                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
225                                 "Peer Identify Reply");
226                 return;
227         default:
228                 break;
229         }
230         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
231         return;
232 }
233 #endif
234 #define MaxIPv4String   16
235 static void
236 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
237 {
238         uint32_t ipv4_addr;
239
240         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
241         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
242                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
243                 ipv4_addr & 0xFF);
244 }
245
246 #define MAX_CLIENTS_NUMBER      128
247 uint8_t active_clients;
248 struct client_stats_t {
249         uint8_t port;
250         uint32_t ipv4_addr;
251         uint32_t ipv4_rx_packets;
252         uint32_t ipv4_tx_packets;
253 };
254 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
255
256 static void
257 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
258 {
259         int i = 0;
260
261         for (; i < MAX_CLIENTS_NUMBER; i++)     {
262                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
263                         /* Just update RX packets number for this client */
264                         if (TXorRXindicator == &burstnumberRX)
265                                 client_stats[i].ipv4_rx_packets++;
266                         else
267                                 client_stats[i].ipv4_tx_packets++;
268                         return;
269                 }
270         }
271         /* We have a new client. Insert him to the table, and increment stats */
272         if (TXorRXindicator == &burstnumberRX)
273                 client_stats[active_clients].ipv4_rx_packets++;
274         else
275                 client_stats[active_clients].ipv4_tx_packets++;
276         client_stats[active_clients].ipv4_addr = addr;
277         client_stats[active_clients].port = port;
278         active_clients++;
279
280 }
281
282 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
283 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
284                 RTE_LOG(DEBUG, PMD, \
285                 "%s " \
286                 "port:%d " \
287                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
288                 "SrcIP:%s " \
289                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
290                 "DstIP:%s " \
291                 "%s " \
292                 "%d\n", \
293                 info, \
294                 port, \
295                 eth_h->s_addr.addr_bytes[0], \
296                 eth_h->s_addr.addr_bytes[1], \
297                 eth_h->s_addr.addr_bytes[2], \
298                 eth_h->s_addr.addr_bytes[3], \
299                 eth_h->s_addr.addr_bytes[4], \
300                 eth_h->s_addr.addr_bytes[5], \
301                 src_ip, \
302                 eth_h->d_addr.addr_bytes[0], \
303                 eth_h->d_addr.addr_bytes[1], \
304                 eth_h->d_addr.addr_bytes[2], \
305                 eth_h->d_addr.addr_bytes[3], \
306                 eth_h->d_addr.addr_bytes[4], \
307                 eth_h->d_addr.addr_bytes[5], \
308                 dst_ip, \
309                 arp_op, \
310                 ++burstnumber)
311 #endif
312
313 static void
314 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
315                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
316 {
317         struct ipv4_hdr *ipv4_h;
318 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
319         struct arp_hdr *arp_h;
320         char dst_ip[16];
321         char ArpOp[24];
322         char buf[16];
323 #endif
324         char src_ip[16];
325
326         uint16_t ether_type = eth_h->ether_type;
327         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
328
329 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
330         snprintf(buf, 16, "%s", info);
331 #endif
332
333         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
334                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
335                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
336 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
337                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
338                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
339 #endif
340                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
341         }
342 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
343         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
344                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
345                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
346                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
347                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
348                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
349         }
350 #endif
351 }
352 #endif
353
354 static uint16_t
355 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
356 {
357         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
358         struct bond_dev_private *internals = bd_tx_q->dev_private;
359         struct ether_hdr *eth_h;
360         uint16_t ether_type, offset;
361         uint16_t nb_recv_pkts;
362         int i;
363
364         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
365
366         for (i = 0; i < nb_recv_pkts; i++) {
367                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
368                 ether_type = eth_h->ether_type;
369                 offset = get_vlan_offset(eth_h, &ether_type);
370
371                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
372 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
373                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
374 #endif
375                         bond_mode_alb_arp_recv(eth_h, offset, internals);
376                 }
377 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
378                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
379                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
380 #endif
381         }
382
383         return nb_recv_pkts;
384 }
385
386 static uint16_t
387 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
388                 uint16_t nb_pkts)
389 {
390         struct bond_dev_private *internals;
391         struct bond_tx_queue *bd_tx_q;
392
393         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
394         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
395
396         uint8_t num_of_slaves;
397         uint8_t slaves[RTE_MAX_ETHPORTS];
398
399         uint16_t num_tx_total = 0, num_tx_slave;
400
401         static int slave_idx = 0;
402         int i, cslave_idx = 0, tx_fail_total = 0;
403
404         bd_tx_q = (struct bond_tx_queue *)queue;
405         internals = bd_tx_q->dev_private;
406
407         /* Copy slave list to protect against slave up/down changes during tx
408          * bursting */
409         num_of_slaves = internals->active_slave_count;
410         memcpy(slaves, internals->active_slaves,
411                         sizeof(internals->active_slaves[0]) * num_of_slaves);
412
413         if (num_of_slaves < 1)
414                 return num_tx_total;
415
416         /* Populate slaves mbuf with which packets are to be sent on it  */
417         for (i = 0; i < nb_pkts; i++) {
418                 cslave_idx = (slave_idx + i) % num_of_slaves;
419                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
420         }
421
422         /* increment current slave index so the next call to tx burst starts on the
423          * next slave */
424         slave_idx = ++cslave_idx;
425
426         /* Send packet burst on each slave device */
427         for (i = 0; i < num_of_slaves; i++) {
428                 if (slave_nb_pkts[i] > 0) {
429                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
430                                         slave_bufs[i], slave_nb_pkts[i]);
431
432                         /* if tx burst fails move packets to end of bufs */
433                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
434                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
435
436                                 tx_fail_total += tx_fail_slave;
437
438                                 memcpy(&bufs[nb_pkts - tx_fail_total],
439                                                 &slave_bufs[i][num_tx_slave],
440                                                 tx_fail_slave * sizeof(bufs[0]));
441                         }
442                         num_tx_total += num_tx_slave;
443                 }
444         }
445
446         return num_tx_total;
447 }
448
449 static uint16_t
450 bond_ethdev_tx_burst_active_backup(void *queue,
451                 struct rte_mbuf **bufs, uint16_t nb_pkts)
452 {
453         struct bond_dev_private *internals;
454         struct bond_tx_queue *bd_tx_q;
455
456         bd_tx_q = (struct bond_tx_queue *)queue;
457         internals = bd_tx_q->dev_private;
458
459         if (internals->active_slave_count < 1)
460                 return 0;
461
462         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
463                         bufs, nb_pkts);
464 }
465
466 static inline uint16_t
467 ether_hash(struct ether_hdr *eth_hdr)
468 {
469         unaligned_uint16_t *word_src_addr =
470                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
471         unaligned_uint16_t *word_dst_addr =
472                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
473
474         return (word_src_addr[0] ^ word_dst_addr[0]) ^
475                         (word_src_addr[1] ^ word_dst_addr[1]) ^
476                         (word_src_addr[2] ^ word_dst_addr[2]);
477 }
478
479 static inline uint32_t
480 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
481 {
482         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
483 }
484
485 static inline uint32_t
486 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
487 {
488         unaligned_uint32_t *word_src_addr =
489                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
490         unaligned_uint32_t *word_dst_addr =
491                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
492
493         return (word_src_addr[0] ^ word_dst_addr[0]) ^
494                         (word_src_addr[1] ^ word_dst_addr[1]) ^
495                         (word_src_addr[2] ^ word_dst_addr[2]) ^
496                         (word_src_addr[3] ^ word_dst_addr[3]);
497 }
498
499 uint16_t
500 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
501 {
502         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
503
504         uint32_t hash = ether_hash(eth_hdr);
505
506         return (hash ^= hash >> 8) % slave_count;
507 }
508
509 uint16_t
510 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
511 {
512         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
513         uint16_t proto = eth_hdr->ether_type;
514         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
515         uint32_t hash, l3hash = 0;
516
517         hash = ether_hash(eth_hdr);
518
519         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
520                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
521                                 ((char *)(eth_hdr + 1) + vlan_offset);
522                 l3hash = ipv4_hash(ipv4_hdr);
523
524         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
525                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
526                                 ((char *)(eth_hdr + 1) + vlan_offset);
527                 l3hash = ipv6_hash(ipv6_hdr);
528         }
529
530         hash = hash ^ l3hash;
531         hash ^= hash >> 16;
532         hash ^= hash >> 8;
533
534         return hash % slave_count;
535 }
536
537 uint16_t
538 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
539 {
540         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
541         uint16_t proto = eth_hdr->ether_type;
542         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
543
544         struct udp_hdr *udp_hdr = NULL;
545         struct tcp_hdr *tcp_hdr = NULL;
546         uint32_t hash, l3hash = 0, l4hash = 0;
547
548         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
549                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
550                                 ((char *)(eth_hdr + 1) + vlan_offset);
551                 size_t ip_hdr_offset;
552
553                 l3hash = ipv4_hash(ipv4_hdr);
554
555                 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
556                                 IPV4_IHL_MULTIPLIER;
557
558                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
559                         tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
560                                         ip_hdr_offset);
561                         l4hash = HASH_L4_PORTS(tcp_hdr);
562                 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
563                         udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
564                                         ip_hdr_offset);
565                         l4hash = HASH_L4_PORTS(udp_hdr);
566                 }
567         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
568                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
569                                 ((char *)(eth_hdr + 1) + vlan_offset);
570                 l3hash = ipv6_hash(ipv6_hdr);
571
572                 if (ipv6_hdr->proto == IPPROTO_TCP) {
573                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
574                         l4hash = HASH_L4_PORTS(tcp_hdr);
575                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
576                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
577                         l4hash = HASH_L4_PORTS(udp_hdr);
578                 }
579         }
580
581         hash = l3hash ^ l4hash;
582         hash ^= hash >> 16;
583         hash ^= hash >> 8;
584
585         return hash % slave_count;
586 }
587
588 struct bwg_slave {
589         uint64_t bwg_left_int;
590         uint64_t bwg_left_remainder;
591         uint8_t slave;
592 };
593
594 void
595 bond_tlb_activate_slave(struct bond_dev_private *internals) {
596         int i;
597
598         for (i = 0; i < internals->active_slave_count; i++) {
599                 tlb_last_obytets[internals->active_slaves[i]] = 0;
600         }
601 }
602
603 static int
604 bandwidth_cmp(const void *a, const void *b)
605 {
606         const struct bwg_slave *bwg_a = a;
607         const struct bwg_slave *bwg_b = b;
608         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
609         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
610                         (int64_t)bwg_a->bwg_left_remainder;
611         if (diff > 0)
612                 return 1;
613         else if (diff < 0)
614                 return -1;
615         else if (diff2 > 0)
616                 return 1;
617         else if (diff2 < 0)
618                 return -1;
619         else
620                 return 0;
621 }
622
623 static void
624 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
625                 struct bwg_slave *bwg_slave)
626 {
627         struct rte_eth_link link_status;
628
629         rte_eth_link_get(port_id, &link_status);
630         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
631         if (link_bwg == 0)
632                 return;
633         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
634         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
635         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
636 }
637
638 static void
639 bond_ethdev_update_tlb_slave_cb(void *arg)
640 {
641         struct bond_dev_private *internals = arg;
642         struct rte_eth_stats slave_stats;
643         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
644         uint8_t slave_count;
645         uint64_t tx_bytes;
646
647         uint8_t update_stats = 0;
648         uint8_t i, slave_id;
649
650         internals->slave_update_idx++;
651
652
653         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
654                 update_stats = 1;
655
656         for (i = 0; i < internals->active_slave_count; i++) {
657                 slave_id = internals->active_slaves[i];
658                 rte_eth_stats_get(slave_id, &slave_stats);
659                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
660                 bandwidth_left(slave_id, tx_bytes,
661                                 internals->slave_update_idx, &bwg_array[i]);
662                 bwg_array[i].slave = slave_id;
663
664                 if (update_stats) {
665                         tlb_last_obytets[slave_id] = slave_stats.obytes;
666                 }
667         }
668
669         if (update_stats == 1)
670                 internals->slave_update_idx = 0;
671
672         slave_count = i;
673         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
674         for (i = 0; i < slave_count; i++)
675                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
676
677         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
678                         (struct bond_dev_private *)internals);
679 }
680
681 static uint16_t
682 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
683 {
684         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
685         struct bond_dev_private *internals = bd_tx_q->dev_private;
686
687         struct rte_eth_dev *primary_port =
688                         &rte_eth_devices[internals->primary_port];
689         uint16_t num_tx_total = 0;
690         uint8_t i, j;
691
692         uint8_t num_of_slaves = internals->active_slave_count;
693         uint8_t slaves[RTE_MAX_ETHPORTS];
694
695         struct ether_hdr *ether_hdr;
696         struct ether_addr primary_slave_addr;
697         struct ether_addr active_slave_addr;
698
699         if (num_of_slaves < 1)
700                 return num_tx_total;
701
702         memcpy(slaves, internals->tlb_slaves_order,
703                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
704
705
706         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
707
708         if (nb_pkts > 3) {
709                 for (i = 0; i < 3; i++)
710                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
711         }
712
713         for (i = 0; i < num_of_slaves; i++) {
714                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
715                 for (j = num_tx_total; j < nb_pkts; j++) {
716                         if (j + 3 < nb_pkts)
717                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
718
719                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
720                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
721                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
722 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
723                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
724 #endif
725                 }
726
727                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
728                                 bufs + num_tx_total, nb_pkts - num_tx_total);
729
730                 if (num_tx_total == nb_pkts)
731                         break;
732         }
733
734         return num_tx_total;
735 }
736
737 void
738 bond_tlb_disable(struct bond_dev_private *internals)
739 {
740         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
741 }
742
743 void
744 bond_tlb_enable(struct bond_dev_private *internals)
745 {
746         bond_ethdev_update_tlb_slave_cb(internals);
747 }
748
749 static uint16_t
750 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
751 {
752         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
753         struct bond_dev_private *internals = bd_tx_q->dev_private;
754
755         struct ether_hdr *eth_h;
756         uint16_t ether_type, offset;
757
758         struct client_data *client_info;
759
760         /*
761          * We create transmit buffers for every slave and one additional to send
762          * through tlb. In worst case every packet will be send on one port.
763          */
764         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
765         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
766
767         /*
768          * We create separate transmit buffers for update packets as they wont be
769          * counted in num_tx_total.
770          */
771         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
772         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
773
774         struct rte_mbuf *upd_pkt;
775         size_t pkt_size;
776
777         uint16_t num_send, num_not_send = 0;
778         uint16_t num_tx_total = 0;
779         uint8_t slave_idx;
780
781         int i, j;
782
783         /* Search tx buffer for ARP packets and forward them to alb */
784         for (i = 0; i < nb_pkts; i++) {
785                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
786                 ether_type = eth_h->ether_type;
787                 offset = get_vlan_offset(eth_h, &ether_type);
788
789                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
790                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
791
792                         /* Change src mac in eth header */
793                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
794
795                         /* Add packet to slave tx buffer */
796                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
797                         slave_bufs_pkts[slave_idx]++;
798                 } else {
799                         /* If packet is not ARP, send it with TLB policy */
800                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
801                                         bufs[i];
802                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
803                 }
804         }
805
806         /* Update connected client ARP tables */
807         if (internals->mode6.ntt) {
808                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
809                         client_info = &internals->mode6.client_table[i];
810
811                         if (client_info->in_use) {
812                                 /* Allocate new packet to send ARP update on current slave */
813                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
814                                 if (upd_pkt == NULL) {
815                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
816                                         continue;
817                                 }
818                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
819                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
820                                 upd_pkt->data_len = pkt_size;
821                                 upd_pkt->pkt_len = pkt_size;
822
823                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
824                                                 internals);
825
826                                 /* Add packet to update tx buffer */
827                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
828                                 update_bufs_pkts[slave_idx]++;
829                         }
830                 }
831                 internals->mode6.ntt = 0;
832         }
833
834         /* Send ARP packets on proper slaves */
835         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
836                 if (slave_bufs_pkts[i] > 0) {
837                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
838                                         slave_bufs[i], slave_bufs_pkts[i]);
839                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
840                                 bufs[nb_pkts - 1 - num_not_send - j] =
841                                                 slave_bufs[i][nb_pkts - 1 - j];
842                         }
843
844                         num_tx_total += num_send;
845                         num_not_send += slave_bufs_pkts[i] - num_send;
846
847 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
848         /* Print TX stats including update packets */
849                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
850                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
851                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
852                         }
853 #endif
854                 }
855         }
856
857         /* Send update packets on proper slaves */
858         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
859                 if (update_bufs_pkts[i] > 0) {
860                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
861                                         update_bufs_pkts[i]);
862                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
863                                 rte_pktmbuf_free(update_bufs[i][j]);
864                         }
865 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
866                         for (j = 0; j < update_bufs_pkts[i]; j++) {
867                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
868                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
869                         }
870 #endif
871                 }
872         }
873
874         /* Send non-ARP packets using tlb policy */
875         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
876                 num_send = bond_ethdev_tx_burst_tlb(queue,
877                                 slave_bufs[RTE_MAX_ETHPORTS],
878                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
879
880                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
881                         bufs[nb_pkts - 1 - num_not_send - j] =
882                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
883                 }
884
885                 num_tx_total += num_send;
886                 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
887         }
888
889         return num_tx_total;
890 }
891
892 static uint16_t
893 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
894                 uint16_t nb_pkts)
895 {
896         struct bond_dev_private *internals;
897         struct bond_tx_queue *bd_tx_q;
898
899         uint8_t num_of_slaves;
900         uint8_t slaves[RTE_MAX_ETHPORTS];
901
902         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
903
904         int i, op_slave_id;
905
906         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
907         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
908
909         bd_tx_q = (struct bond_tx_queue *)queue;
910         internals = bd_tx_q->dev_private;
911
912         /* Copy slave list to protect against slave up/down changes during tx
913          * bursting */
914         num_of_slaves = internals->active_slave_count;
915         memcpy(slaves, internals->active_slaves,
916                         sizeof(internals->active_slaves[0]) * num_of_slaves);
917
918         if (num_of_slaves < 1)
919                 return num_tx_total;
920
921         /* Populate slaves mbuf with the packets which are to be sent on it  */
922         for (i = 0; i < nb_pkts; i++) {
923                 /* Select output slave using hash based on xmit policy */
924                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
925
926                 /* Populate slave mbuf arrays with mbufs for that slave */
927                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
928         }
929
930         /* Send packet burst on each slave device */
931         for (i = 0; i < num_of_slaves; i++) {
932                 if (slave_nb_pkts[i] > 0) {
933                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
934                                         slave_bufs[i], slave_nb_pkts[i]);
935
936                         /* if tx burst fails move packets to end of bufs */
937                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
938                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
939
940                                 tx_fail_total += slave_tx_fail_count;
941                                 memcpy(&bufs[nb_pkts - tx_fail_total],
942                                                 &slave_bufs[i][num_tx_slave],
943                                                 slave_tx_fail_count * sizeof(bufs[0]));
944                         }
945
946                         num_tx_total += num_tx_slave;
947                 }
948         }
949
950         return num_tx_total;
951 }
952
953 static uint16_t
954 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
955                 uint16_t nb_pkts)
956 {
957         struct bond_dev_private *internals;
958         struct bond_tx_queue *bd_tx_q;
959
960         uint8_t num_of_slaves;
961         uint8_t slaves[RTE_MAX_ETHPORTS];
962          /* positions in slaves, not ID */
963         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
964         uint8_t distributing_count;
965
966         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
967         uint16_t i, j, op_slave_idx;
968         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
969
970         /* Allocate additional packets in case 8023AD mode. */
971         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
972         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
973
974         /* Total amount of packets in slave_bufs */
975         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
976         /* Slow packets placed in each slave */
977         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
978
979         bd_tx_q = (struct bond_tx_queue *)queue;
980         internals = bd_tx_q->dev_private;
981
982         /* Copy slave list to protect against slave up/down changes during tx
983          * bursting */
984         num_of_slaves = internals->active_slave_count;
985         if (num_of_slaves < 1)
986                 return num_tx_total;
987
988         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
989
990         distributing_count = 0;
991         for (i = 0; i < num_of_slaves; i++) {
992                 struct port *port = &mode_8023ad_ports[slaves[i]];
993
994                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
995                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
996                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
997
998                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
999                         slave_bufs[i][j] = slow_pkts[j];
1000
1001                 if (ACTOR_STATE(port, DISTRIBUTING))
1002                         distributing_offsets[distributing_count++] = i;
1003         }
1004
1005         if (likely(distributing_count > 0)) {
1006                 /* Populate slaves mbuf with the packets which are to be sent on it */
1007                 for (i = 0; i < nb_pkts; i++) {
1008                         /* Select output slave using hash based on xmit policy */
1009                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1010
1011                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1012                          * slaves that are currently distributing. */
1013                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1014                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1015                         slave_nb_pkts[slave_offset]++;
1016                 }
1017         }
1018
1019         /* Send packet burst on each slave device */
1020         for (i = 0; i < num_of_slaves; i++) {
1021                 if (slave_nb_pkts[i] == 0)
1022                         continue;
1023
1024                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1025                                 slave_bufs[i], slave_nb_pkts[i]);
1026
1027                 /* If tx burst fails drop slow packets */
1028                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1029                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1030
1031                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1032                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1033
1034                 /* If tx burst fails move packets to end of bufs */
1035                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1036                         uint16_t j = nb_pkts - num_tx_fail_total;
1037                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1038                                 bufs[j] = slave_bufs[i][num_tx_slave];
1039                 }
1040         }
1041
1042         return num_tx_total;
1043 }
1044
1045 static uint16_t
1046 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1047                 uint16_t nb_pkts)
1048 {
1049         struct bond_dev_private *internals;
1050         struct bond_tx_queue *bd_tx_q;
1051
1052         uint8_t tx_failed_flag = 0, num_of_slaves;
1053         uint8_t slaves[RTE_MAX_ETHPORTS];
1054
1055         uint16_t max_nb_of_tx_pkts = 0;
1056
1057         int slave_tx_total[RTE_MAX_ETHPORTS];
1058         int i, most_successful_tx_slave = -1;
1059
1060         bd_tx_q = (struct bond_tx_queue *)queue;
1061         internals = bd_tx_q->dev_private;
1062
1063         /* Copy slave list to protect against slave up/down changes during tx
1064          * bursting */
1065         num_of_slaves = internals->active_slave_count;
1066         memcpy(slaves, internals->active_slaves,
1067                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1068
1069         if (num_of_slaves < 1)
1070                 return 0;
1071
1072         /* Increment reference count on mbufs */
1073         for (i = 0; i < nb_pkts; i++)
1074                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1075
1076         /* Transmit burst on each active slave */
1077         for (i = 0; i < num_of_slaves; i++) {
1078                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1079                                         bufs, nb_pkts);
1080
1081                 if (unlikely(slave_tx_total[i] < nb_pkts))
1082                         tx_failed_flag = 1;
1083
1084                 /* record the value and slave index for the slave which transmits the
1085                  * maximum number of packets */
1086                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1087                         max_nb_of_tx_pkts = slave_tx_total[i];
1088                         most_successful_tx_slave = i;
1089                 }
1090         }
1091
1092         /* if slaves fail to transmit packets from burst, the calling application
1093          * is not expected to know about multiple references to packets so we must
1094          * handle failures of all packets except those of the most successful slave
1095          */
1096         if (unlikely(tx_failed_flag))
1097                 for (i = 0; i < num_of_slaves; i++)
1098                         if (i != most_successful_tx_slave)
1099                                 while (slave_tx_total[i] < nb_pkts)
1100                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1101
1102         return max_nb_of_tx_pkts;
1103 }
1104
1105 void
1106 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1107                 struct rte_eth_link *slave_dev_link)
1108 {
1109         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1110         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1111
1112         if (slave_dev_link->link_status &&
1113                 bonded_eth_dev->data->dev_started) {
1114                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1115                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1116
1117                 internals->link_props_set = 1;
1118         }
1119 }
1120
1121 void
1122 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1123 {
1124         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1125
1126         memset(&(bonded_eth_dev->data->dev_link), 0,
1127                         sizeof(bonded_eth_dev->data->dev_link));
1128
1129         internals->link_props_set = 0;
1130 }
1131
1132 int
1133 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1134                 struct rte_eth_link *slave_dev_link)
1135 {
1136         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1137                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1138                 return -1;
1139
1140         return 0;
1141 }
1142
1143 int
1144 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1145 {
1146         struct ether_addr *mac_addr;
1147
1148         if (eth_dev == NULL) {
1149                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1150                 return -1;
1151         }
1152
1153         if (dst_mac_addr == NULL) {
1154                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1155                 return -1;
1156         }
1157
1158         mac_addr = eth_dev->data->mac_addrs;
1159
1160         ether_addr_copy(mac_addr, dst_mac_addr);
1161         return 0;
1162 }
1163
1164 int
1165 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1166 {
1167         struct ether_addr *mac_addr;
1168
1169         if (eth_dev == NULL) {
1170                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1171                 return -1;
1172         }
1173
1174         if (new_mac_addr == NULL) {
1175                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1176                 return -1;
1177         }
1178
1179         mac_addr = eth_dev->data->mac_addrs;
1180
1181         /* If new MAC is different to current MAC then update */
1182         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1183                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1184
1185         return 0;
1186 }
1187
1188 int
1189 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1190 {
1191         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1192         int i;
1193
1194         /* Update slave devices MAC addresses */
1195         if (internals->slave_count < 1)
1196                 return -1;
1197
1198         switch (internals->mode) {
1199         case BONDING_MODE_ROUND_ROBIN:
1200         case BONDING_MODE_BALANCE:
1201         case BONDING_MODE_BROADCAST:
1202                 for (i = 0; i < internals->slave_count; i++) {
1203                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1204                                         bonded_eth_dev->data->mac_addrs)) {
1205                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1206                                                 internals->slaves[i].port_id);
1207                                 return -1;
1208                         }
1209                 }
1210                 break;
1211         case BONDING_MODE_8023AD:
1212                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1213                 break;
1214         case BONDING_MODE_ACTIVE_BACKUP:
1215         case BONDING_MODE_TLB:
1216         case BONDING_MODE_ALB:
1217         default:
1218                 for (i = 0; i < internals->slave_count; i++) {
1219                         if (internals->slaves[i].port_id ==
1220                                         internals->current_primary_port) {
1221                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1222                                                 bonded_eth_dev->data->mac_addrs)) {
1223                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1224                                                         internals->current_primary_port);
1225                                         return -1;
1226                                 }
1227                         } else {
1228                                 if (mac_address_set(
1229                                                 &rte_eth_devices[internals->slaves[i].port_id],
1230                                                 &internals->slaves[i].persisted_mac_addr)) {
1231                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1232                                                         internals->slaves[i].port_id);
1233                                         return -1;
1234                                 }
1235                         }
1236                 }
1237         }
1238
1239         return 0;
1240 }
1241
1242 int
1243 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1244 {
1245         struct bond_dev_private *internals;
1246
1247         internals = eth_dev->data->dev_private;
1248
1249         switch (mode) {
1250         case BONDING_MODE_ROUND_ROBIN:
1251                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1252                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1253                 break;
1254         case BONDING_MODE_ACTIVE_BACKUP:
1255                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1256                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1257                 break;
1258         case BONDING_MODE_BALANCE:
1259                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1260                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1261                 break;
1262         case BONDING_MODE_BROADCAST:
1263                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1264                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1265                 break;
1266         case BONDING_MODE_8023AD:
1267                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1268                         return -1;
1269
1270                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1271                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1272                 RTE_LOG(WARNING, PMD,
1273                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1274                                 "at least every 100ms.\n");
1275                 break;
1276         case BONDING_MODE_TLB:
1277                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1278                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1279                 break;
1280         case BONDING_MODE_ALB:
1281                 if (bond_mode_alb_enable(eth_dev) != 0)
1282                         return -1;
1283
1284                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1285                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1286                 break;
1287         default:
1288                 return -1;
1289         }
1290
1291         internals->mode = mode;
1292
1293         return 0;
1294 }
1295
1296 int
1297 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1298                 struct rte_eth_dev *slave_eth_dev)
1299 {
1300         struct bond_rx_queue *bd_rx_q;
1301         struct bond_tx_queue *bd_tx_q;
1302
1303         int errval;
1304         uint16_t q_id;
1305
1306         /* Stop slave */
1307         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1308
1309         /* Enable interrupts on slave device if supported */
1310         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1311                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1312
1313         /* If RSS is enabled for bonding, try to enable it for slaves  */
1314         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1315                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1316                                 != 0) {
1317                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1318                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1319                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1320                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1321                 } else {
1322                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1323                 }
1324
1325                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1326                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1327                 slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
1328         }
1329
1330         /* Configure device */
1331         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1332                         bonded_eth_dev->data->nb_rx_queues,
1333                         bonded_eth_dev->data->nb_tx_queues,
1334                         &(slave_eth_dev->data->dev_conf));
1335         if (errval != 0) {
1336                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1337                                 slave_eth_dev->data->port_id, errval);
1338                 return errval;
1339         }
1340
1341         /* Setup Rx Queues */
1342         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1343                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1344
1345                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1346                                 bd_rx_q->nb_rx_desc,
1347                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1348                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1349                 if (errval != 0) {
1350                         RTE_BOND_LOG(ERR,
1351                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1352                                         slave_eth_dev->data->port_id, q_id, errval);
1353                         return errval;
1354                 }
1355         }
1356
1357         /* Setup Tx Queues */
1358         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1359                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1360
1361                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1362                                 bd_tx_q->nb_tx_desc,
1363                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1364                                 &bd_tx_q->tx_conf);
1365                 if (errval != 0) {
1366                         RTE_BOND_LOG(ERR,
1367                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1368                                         slave_eth_dev->data->port_id, q_id, errval);
1369                         return errval;
1370                 }
1371         }
1372
1373         /* Start device */
1374         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1375         if (errval != 0) {
1376                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1377                                 slave_eth_dev->data->port_id, errval);
1378                 return -1;
1379         }
1380
1381         /* If RSS is enabled for bonding, synchronize RETA */
1382         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1383                 int i;
1384                 struct bond_dev_private *internals;
1385
1386                 internals = bonded_eth_dev->data->dev_private;
1387
1388                 for (i = 0; i < internals->slave_count; i++) {
1389                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1390                                 errval = rte_eth_dev_rss_reta_update(
1391                                                 slave_eth_dev->data->port_id,
1392                                                 &internals->reta_conf[0],
1393                                                 internals->slaves[i].reta_size);
1394                                 if (errval != 0) {
1395                                         RTE_LOG(WARNING, PMD,
1396                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1397                                                         " RSS Configuration for bonding may be inconsistent.\n",
1398                                                         slave_eth_dev->data->port_id, errval);
1399                                 }
1400                                 break;
1401                         }
1402                 }
1403         }
1404
1405         /* If lsc interrupt is set, check initial slave's link status */
1406         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1407                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1408                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1409
1410         return 0;
1411 }
1412
1413 void
1414 slave_remove(struct bond_dev_private *internals,
1415                 struct rte_eth_dev *slave_eth_dev)
1416 {
1417         uint8_t i;
1418
1419         for (i = 0; i < internals->slave_count; i++)
1420                 if (internals->slaves[i].port_id ==
1421                                 slave_eth_dev->data->port_id)
1422                         break;
1423
1424         if (i < (internals->slave_count - 1))
1425                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1426                                 sizeof(internals->slaves[0]) *
1427                                 (internals->slave_count - i - 1));
1428
1429         internals->slave_count--;
1430 }
1431
1432 static void
1433 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1434
1435 void
1436 slave_add(struct bond_dev_private *internals,
1437                 struct rte_eth_dev *slave_eth_dev)
1438 {
1439         struct bond_slave_details *slave_details =
1440                         &internals->slaves[internals->slave_count];
1441
1442         slave_details->port_id = slave_eth_dev->data->port_id;
1443         slave_details->last_link_status = 0;
1444
1445         /* If slave device doesn't support interrupts then we need to enabled
1446          * polling to monitor link status */
1447         if (!(slave_eth_dev->data->dev_flags & RTE_PCI_DRV_INTR_LSC)) {
1448                 slave_details->link_status_poll_enabled = 1;
1449
1450                 if (!internals->link_status_polling_enabled) {
1451                         internals->link_status_polling_enabled = 1;
1452
1453                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1454                                         bond_ethdev_slave_link_status_change_monitor,
1455                                         (void *)&rte_eth_devices[internals->port_id]);
1456                 }
1457         }
1458
1459         slave_details->link_status_wait_to_complete = 0;
1460         /* clean tlb_last_obytes when adding port for bonding device */
1461         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1462                         sizeof(struct ether_addr));
1463 }
1464
1465 void
1466 bond_ethdev_primary_set(struct bond_dev_private *internals,
1467                 uint8_t slave_port_id)
1468 {
1469         int i;
1470
1471         if (internals->active_slave_count < 1)
1472                 internals->current_primary_port = slave_port_id;
1473         else
1474                 /* Search bonded device slave ports for new proposed primary port */
1475                 for (i = 0; i < internals->active_slave_count; i++) {
1476                         if (internals->active_slaves[i] == slave_port_id)
1477                                 internals->current_primary_port = slave_port_id;
1478                 }
1479 }
1480
1481 static void
1482 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1483
1484 static int
1485 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1486 {
1487         struct bond_dev_private *internals;
1488         int i;
1489
1490         /* slave eth dev will be started by bonded device */
1491         if (check_for_bonded_ethdev(eth_dev)) {
1492                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1493                                 eth_dev->data->port_id);
1494                 return -1;
1495         }
1496
1497         eth_dev->data->dev_link.link_status = 0;
1498         eth_dev->data->dev_started = 1;
1499
1500         internals = eth_dev->data->dev_private;
1501
1502         if (internals->slave_count == 0) {
1503                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1504                 return -1;
1505         }
1506
1507         if (internals->user_defined_mac == 0) {
1508                 struct ether_addr *new_mac_addr = NULL;
1509
1510                 for (i = 0; i < internals->slave_count; i++)
1511                         if (internals->slaves[i].port_id == internals->primary_port)
1512                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1513
1514                 if (new_mac_addr == NULL)
1515                         return -1;
1516
1517                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1518                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1519                                         eth_dev->data->port_id);
1520                         return -1;
1521                 }
1522         }
1523
1524         /* Update all slave devices MACs*/
1525         if (mac_address_slaves_update(eth_dev) != 0)
1526                 return -1;
1527
1528         /* If bonded device is configure in promiscuous mode then re-apply config */
1529         if (internals->promiscuous_en)
1530                 bond_ethdev_promiscuous_enable(eth_dev);
1531
1532         /* Reconfigure each slave device if starting bonded device */
1533         for (i = 0; i < internals->slave_count; i++) {
1534                 if (slave_configure(eth_dev,
1535                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1536                         RTE_BOND_LOG(ERR,
1537                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1538                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1539                         return -1;
1540                 }
1541         }
1542
1543         if (internals->user_defined_primary_port)
1544                 bond_ethdev_primary_set(internals, internals->primary_port);
1545
1546         if (internals->mode == BONDING_MODE_8023AD)
1547                 bond_mode_8023ad_start(eth_dev);
1548
1549         if (internals->mode == BONDING_MODE_TLB ||
1550                         internals->mode == BONDING_MODE_ALB)
1551                 bond_tlb_enable(internals);
1552
1553         return 0;
1554 }
1555
1556 static void
1557 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1558 {
1559         uint8_t i;
1560
1561         if (dev->data->rx_queues != NULL) {
1562                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1563                         rte_free(dev->data->rx_queues[i]);
1564                         dev->data->rx_queues[i] = NULL;
1565                 }
1566                 dev->data->nb_rx_queues = 0;
1567         }
1568
1569         if (dev->data->tx_queues != NULL) {
1570                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1571                         rte_free(dev->data->tx_queues[i]);
1572                         dev->data->tx_queues[i] = NULL;
1573                 }
1574                 dev->data->nb_tx_queues = 0;
1575         }
1576 }
1577
1578 void
1579 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1580 {
1581         struct bond_dev_private *internals = eth_dev->data->dev_private;
1582         uint8_t i;
1583
1584         if (internals->mode == BONDING_MODE_8023AD) {
1585                 struct port *port;
1586                 void *pkt = NULL;
1587
1588                 bond_mode_8023ad_stop(eth_dev);
1589
1590                 /* Discard all messages to/from mode 4 state machines */
1591                 for (i = 0; i < internals->active_slave_count; i++) {
1592                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1593
1594                         RTE_VERIFY(port->rx_ring != NULL);
1595                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1596                                 rte_pktmbuf_free(pkt);
1597
1598                         RTE_VERIFY(port->tx_ring != NULL);
1599                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1600                                 rte_pktmbuf_free(pkt);
1601                 }
1602         }
1603
1604         if (internals->mode == BONDING_MODE_TLB ||
1605                         internals->mode == BONDING_MODE_ALB) {
1606                 bond_tlb_disable(internals);
1607                 for (i = 0; i < internals->active_slave_count; i++)
1608                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1609         }
1610
1611         internals->active_slave_count = 0;
1612         internals->link_status_polling_enabled = 0;
1613
1614         eth_dev->data->dev_link.link_status = 0;
1615         eth_dev->data->dev_started = 0;
1616 }
1617
1618 void
1619 bond_ethdev_close(struct rte_eth_dev *dev)
1620 {
1621         bond_ethdev_free_queues(dev);
1622 }
1623
1624 /* forward declaration */
1625 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1626
1627 static void
1628 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1629 {
1630         struct bond_dev_private *internals = dev->data->dev_private;
1631
1632         dev_info->max_mac_addrs = 1;
1633
1634         dev_info->max_rx_pktlen = (uint32_t)2048;
1635
1636         dev_info->max_rx_queues = (uint16_t)128;
1637         dev_info->max_tx_queues = (uint16_t)512;
1638
1639         dev_info->min_rx_bufsize = 0;
1640         dev_info->pci_dev = NULL;
1641
1642         dev_info->rx_offload_capa = internals->rx_offload_capa;
1643         dev_info->tx_offload_capa = internals->tx_offload_capa;
1644         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1645
1646         dev_info->reta_size = internals->reta_size;
1647 }
1648
1649 static int
1650 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1651                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1652                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1653 {
1654         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1655                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1656                                         0, dev->data->numa_node);
1657         if (bd_rx_q == NULL)
1658                 return -1;
1659
1660         bd_rx_q->queue_id = rx_queue_id;
1661         bd_rx_q->dev_private = dev->data->dev_private;
1662
1663         bd_rx_q->nb_rx_desc = nb_rx_desc;
1664
1665         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1666         bd_rx_q->mb_pool = mb_pool;
1667
1668         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1669
1670         return 0;
1671 }
1672
1673 static int
1674 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1675                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1676                 const struct rte_eth_txconf *tx_conf)
1677 {
1678         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1679                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1680                                         0, dev->data->numa_node);
1681
1682         if (bd_tx_q == NULL)
1683                 return -1;
1684
1685         bd_tx_q->queue_id = tx_queue_id;
1686         bd_tx_q->dev_private = dev->data->dev_private;
1687
1688         bd_tx_q->nb_tx_desc = nb_tx_desc;
1689         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1690
1691         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1692
1693         return 0;
1694 }
1695
1696 static void
1697 bond_ethdev_rx_queue_release(void *queue)
1698 {
1699         if (queue == NULL)
1700                 return;
1701
1702         rte_free(queue);
1703 }
1704
1705 static void
1706 bond_ethdev_tx_queue_release(void *queue)
1707 {
1708         if (queue == NULL)
1709                 return;
1710
1711         rte_free(queue);
1712 }
1713
1714 static void
1715 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1716 {
1717         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1718         struct bond_dev_private *internals;
1719
1720         /* Default value for polling slave found is true as we don't want to
1721          * disable the polling thread if we cannot get the lock */
1722         int i, polling_slave_found = 1;
1723
1724         if (cb_arg == NULL)
1725                 return;
1726
1727         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1728         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1729
1730         if (!bonded_ethdev->data->dev_started ||
1731                 !internals->link_status_polling_enabled)
1732                 return;
1733
1734         /* If device is currently being configured then don't check slaves link
1735          * status, wait until next period */
1736         if (rte_spinlock_trylock(&internals->lock)) {
1737                 if (internals->slave_count > 0)
1738                         polling_slave_found = 0;
1739
1740                 for (i = 0; i < internals->slave_count; i++) {
1741                         if (!internals->slaves[i].link_status_poll_enabled)
1742                                 continue;
1743
1744                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1745                         polling_slave_found = 1;
1746
1747                         /* Update slave link status */
1748                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1749                                         internals->slaves[i].link_status_wait_to_complete);
1750
1751                         /* if link status has changed since last checked then call lsc
1752                          * event callback */
1753                         if (slave_ethdev->data->dev_link.link_status !=
1754                                         internals->slaves[i].last_link_status) {
1755                                 internals->slaves[i].last_link_status =
1756                                                 slave_ethdev->data->dev_link.link_status;
1757
1758                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1759                                                 RTE_ETH_EVENT_INTR_LSC,
1760                                                 &bonded_ethdev->data->port_id);
1761                         }
1762                 }
1763                 rte_spinlock_unlock(&internals->lock);
1764         }
1765
1766         if (polling_slave_found)
1767                 /* Set alarm to continue monitoring link status of slave ethdev's */
1768                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1769                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1770 }
1771
1772 static int
1773 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1774                 int wait_to_complete)
1775 {
1776         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1777
1778         if (!bonded_eth_dev->data->dev_started ||
1779                 internals->active_slave_count == 0) {
1780                 bonded_eth_dev->data->dev_link.link_status = 0;
1781                 return 0;
1782         } else {
1783                 struct rte_eth_dev *slave_eth_dev;
1784                 int i, link_up = 0;
1785
1786                 for (i = 0; i < internals->active_slave_count; i++) {
1787                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1788
1789                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1790                                         wait_to_complete);
1791                         if (slave_eth_dev->data->dev_link.link_status == 1) {
1792                                 link_up = 1;
1793                                 break;
1794                         }
1795                 }
1796
1797                 bonded_eth_dev->data->dev_link.link_status = link_up;
1798         }
1799
1800         return 0;
1801 }
1802
1803 static void
1804 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1805 {
1806         struct bond_dev_private *internals = dev->data->dev_private;
1807         struct rte_eth_stats slave_stats;
1808         int i, j;
1809
1810         for (i = 0; i < internals->slave_count; i++) {
1811                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1812
1813                 stats->ipackets += slave_stats.ipackets;
1814                 stats->opackets += slave_stats.opackets;
1815                 stats->ibytes += slave_stats.ibytes;
1816                 stats->obytes += slave_stats.obytes;
1817                 stats->ierrors += slave_stats.ierrors;
1818                 stats->oerrors += slave_stats.oerrors;
1819                 stats->imcasts += slave_stats.imcasts;
1820                 stats->rx_nombuf += slave_stats.rx_nombuf;
1821                 stats->fdirmatch += slave_stats.fdirmatch;
1822                 stats->fdirmiss += slave_stats.fdirmiss;
1823                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1824                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1825                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1826                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1827
1828                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1829                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1830                         stats->q_opackets[j] += slave_stats.q_opackets[j];
1831                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1832                         stats->q_obytes[j] += slave_stats.q_obytes[j];
1833                         stats->q_errors[j] += slave_stats.q_errors[j];
1834                 }
1835
1836         }
1837 }
1838
1839 static void
1840 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1841 {
1842         struct bond_dev_private *internals = dev->data->dev_private;
1843         int i;
1844
1845         for (i = 0; i < internals->slave_count; i++)
1846                 rte_eth_stats_reset(internals->slaves[i].port_id);
1847 }
1848
1849 static void
1850 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1851 {
1852         struct bond_dev_private *internals = eth_dev->data->dev_private;
1853         int i;
1854
1855         internals->promiscuous_en = 1;
1856
1857         switch (internals->mode) {
1858         /* Promiscuous mode is propagated to all slaves */
1859         case BONDING_MODE_ROUND_ROBIN:
1860         case BONDING_MODE_BALANCE:
1861         case BONDING_MODE_BROADCAST:
1862                 for (i = 0; i < internals->slave_count; i++)
1863                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1864                 break;
1865         /* In mode4 promiscus mode is managed when slave is added/removed */
1866         case BONDING_MODE_8023AD:
1867                 break;
1868         /* Promiscuous mode is propagated only to primary slave */
1869         case BONDING_MODE_ACTIVE_BACKUP:
1870         case BONDING_MODE_TLB:
1871         case BONDING_MODE_ALB:
1872         default:
1873                 rte_eth_promiscuous_enable(internals->current_primary_port);
1874         }
1875 }
1876
1877 static void
1878 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1879 {
1880         struct bond_dev_private *internals = dev->data->dev_private;
1881         int i;
1882
1883         internals->promiscuous_en = 0;
1884
1885         switch (internals->mode) {
1886         /* Promiscuous mode is propagated to all slaves */
1887         case BONDING_MODE_ROUND_ROBIN:
1888         case BONDING_MODE_BALANCE:
1889         case BONDING_MODE_BROADCAST:
1890                 for (i = 0; i < internals->slave_count; i++)
1891                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1892                 break;
1893         /* In mode4 promiscus mode is set managed when slave is added/removed */
1894         case BONDING_MODE_8023AD:
1895                 break;
1896         /* Promiscuous mode is propagated only to primary slave */
1897         case BONDING_MODE_ACTIVE_BACKUP:
1898         case BONDING_MODE_TLB:
1899         case BONDING_MODE_ALB:
1900         default:
1901                 rte_eth_promiscuous_disable(internals->current_primary_port);
1902         }
1903 }
1904
1905 static void
1906 bond_ethdev_delayed_lsc_propagation(void *arg)
1907 {
1908         if (arg == NULL)
1909                 return;
1910
1911         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1912                         RTE_ETH_EVENT_INTR_LSC);
1913 }
1914
1915 void
1916 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1917                 void *param)
1918 {
1919         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1920         struct bond_dev_private *internals;
1921         struct rte_eth_link link;
1922
1923         int i, valid_slave = 0;
1924         uint8_t active_pos;
1925         uint8_t lsc_flag = 0;
1926
1927         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1928                 return;
1929
1930         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1931         slave_eth_dev = &rte_eth_devices[port_id];
1932
1933         if (check_for_bonded_ethdev(bonded_eth_dev))
1934                 return;
1935
1936         internals = bonded_eth_dev->data->dev_private;
1937
1938         /* If the device isn't started don't handle interrupts */
1939         if (!bonded_eth_dev->data->dev_started)
1940                 return;
1941
1942         /* verify that port_id is a valid slave of bonded port */
1943         for (i = 0; i < internals->slave_count; i++) {
1944                 if (internals->slaves[i].port_id == port_id) {
1945                         valid_slave = 1;
1946                         break;
1947                 }
1948         }
1949
1950         if (!valid_slave)
1951                 return;
1952
1953         /* Search for port in active port list */
1954         active_pos = find_slave_by_id(internals->active_slaves,
1955                         internals->active_slave_count, port_id);
1956
1957         rte_eth_link_get_nowait(port_id, &link);
1958         if (link.link_status) {
1959                 if (active_pos < internals->active_slave_count)
1960                         return;
1961
1962                 /* if no active slave ports then set this port to be primary port */
1963                 if (internals->active_slave_count < 1) {
1964                         /* If first active slave, then change link status */
1965                         bonded_eth_dev->data->dev_link.link_status = 1;
1966                         internals->current_primary_port = port_id;
1967                         lsc_flag = 1;
1968
1969                         mac_address_slaves_update(bonded_eth_dev);
1970
1971                         /* Inherit eth dev link properties from first active slave */
1972                         link_properties_set(bonded_eth_dev,
1973                                         &(slave_eth_dev->data->dev_link));
1974                 }
1975
1976                 activate_slave(bonded_eth_dev, port_id);
1977
1978                 /* If user has defined the primary port then default to using it */
1979                 if (internals->user_defined_primary_port &&
1980                                 internals->primary_port == port_id)
1981                         bond_ethdev_primary_set(internals, port_id);
1982         } else {
1983                 if (active_pos == internals->active_slave_count)
1984                         return;
1985
1986                 /* Remove from active slave list */
1987                 deactivate_slave(bonded_eth_dev, port_id);
1988
1989                 /* No active slaves, change link status to down and reset other
1990                  * link properties */
1991                 if (internals->active_slave_count < 1) {
1992                         lsc_flag = 1;
1993                         bonded_eth_dev->data->dev_link.link_status = 0;
1994
1995                         link_properties_reset(bonded_eth_dev);
1996                 }
1997
1998                 /* Update primary id, take first active slave from list or if none
1999                  * available set to -1 */
2000                 if (port_id == internals->current_primary_port) {
2001                         if (internals->active_slave_count > 0)
2002                                 bond_ethdev_primary_set(internals,
2003                                                 internals->active_slaves[0]);
2004                         else
2005                                 internals->current_primary_port = internals->primary_port;
2006                 }
2007         }
2008
2009         if (lsc_flag) {
2010                 /* Cancel any possible outstanding interrupts if delays are enabled */
2011                 if (internals->link_up_delay_ms > 0 ||
2012                         internals->link_down_delay_ms > 0)
2013                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2014                                         bonded_eth_dev);
2015
2016                 if (bonded_eth_dev->data->dev_link.link_status) {
2017                         if (internals->link_up_delay_ms > 0)
2018                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2019                                                 bond_ethdev_delayed_lsc_propagation,
2020                                                 (void *)bonded_eth_dev);
2021                         else
2022                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2023                                                 RTE_ETH_EVENT_INTR_LSC);
2024
2025                 } else {
2026                         if (internals->link_down_delay_ms > 0)
2027                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2028                                                 bond_ethdev_delayed_lsc_propagation,
2029                                                 (void *)bonded_eth_dev);
2030                         else
2031                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2032                                                 RTE_ETH_EVENT_INTR_LSC);
2033                 }
2034         }
2035 }
2036
2037 static int
2038 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2039                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2040 {
2041         unsigned i, j;
2042         int result = 0;
2043         int slave_reta_size;
2044         unsigned reta_count;
2045         struct bond_dev_private *internals = dev->data->dev_private;
2046
2047         if (reta_size != internals->reta_size)
2048                 return -EINVAL;
2049
2050          /* Copy RETA table */
2051         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2052
2053         for (i = 0; i < reta_count; i++) {
2054                 internals->reta_conf[i].mask = reta_conf[i].mask;
2055                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2056                         if ((reta_conf[i].mask >> j) & 0x01)
2057                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2058         }
2059
2060         /* Fill rest of array */
2061         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2062                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2063                                 sizeof(internals->reta_conf[0]) * reta_count);
2064
2065         /* Propagate RETA over slaves */
2066         for (i = 0; i < internals->slave_count; i++) {
2067                 slave_reta_size = internals->slaves[i].reta_size;
2068                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2069                                 &internals->reta_conf[0], slave_reta_size);
2070                 if (result < 0)
2071                         return result;
2072         }
2073
2074         return 0;
2075 }
2076
2077 static int
2078 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2079                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2080 {
2081         int i, j;
2082         struct bond_dev_private *internals = dev->data->dev_private;
2083
2084         if (reta_size != internals->reta_size)
2085                 return -EINVAL;
2086
2087          /* Copy RETA table */
2088         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2089                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2090                         if ((reta_conf[i].mask >> j) & 0x01)
2091                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2092
2093         return 0;
2094 }
2095
2096 static int
2097 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2098                 struct rte_eth_rss_conf *rss_conf)
2099 {
2100         int i, result = 0;
2101         struct bond_dev_private *internals = dev->data->dev_private;
2102         struct rte_eth_rss_conf bond_rss_conf;
2103
2104         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2105
2106         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2107
2108         if (bond_rss_conf.rss_hf != 0)
2109                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2110
2111         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2112                         sizeof(internals->rss_key)) {
2113                 if (bond_rss_conf.rss_key_len == 0)
2114                         bond_rss_conf.rss_key_len = 40;
2115                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2116                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2117                                 internals->rss_key_len);
2118         }
2119
2120         for (i = 0; i < internals->slave_count; i++) {
2121                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2122                                 &bond_rss_conf);
2123                 if (result < 0)
2124                         return result;
2125         }
2126
2127         return 0;
2128 }
2129
2130 static int
2131 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2132                 struct rte_eth_rss_conf *rss_conf)
2133 {
2134         struct bond_dev_private *internals = dev->data->dev_private;
2135
2136         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2137         rss_conf->rss_key_len = internals->rss_key_len;
2138         if (rss_conf->rss_key)
2139                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2140
2141         return 0;
2142 }
2143
2144 struct eth_dev_ops default_dev_ops = {
2145                 .dev_start            = bond_ethdev_start,
2146                 .dev_stop             = bond_ethdev_stop,
2147                 .dev_close            = bond_ethdev_close,
2148                 .dev_configure        = bond_ethdev_configure,
2149                 .dev_infos_get        = bond_ethdev_info,
2150                 .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2151                 .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2152                 .rx_queue_release     = bond_ethdev_rx_queue_release,
2153                 .tx_queue_release     = bond_ethdev_tx_queue_release,
2154                 .link_update          = bond_ethdev_link_update,
2155                 .stats_get            = bond_ethdev_stats_get,
2156                 .stats_reset          = bond_ethdev_stats_reset,
2157                 .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2158                 .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2159                 .reta_update          = bond_ethdev_rss_reta_update,
2160                 .reta_query           = bond_ethdev_rss_reta_query,
2161                 .rss_hash_update      = bond_ethdev_rss_hash_update,
2162                 .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2163 };
2164
2165 static int
2166 bond_init(const char *name, const char *params)
2167 {
2168         struct bond_dev_private *internals;
2169         struct rte_kvargs *kvlist;
2170         uint8_t bonding_mode, socket_id;
2171         int  arg_count, port_id;
2172
2173         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2174
2175         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2176         if (kvlist == NULL)
2177                 return -1;
2178
2179         /* Parse link bonding mode */
2180         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2181                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2182                                 &bond_ethdev_parse_slave_mode_kvarg,
2183                                 &bonding_mode) != 0) {
2184                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2185                                         name);
2186                         goto parse_error;
2187                 }
2188         } else {
2189                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2190                                 "device %s\n", name);
2191                 goto parse_error;
2192         }
2193
2194         /* Parse socket id to create bonding device on */
2195         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2196         if (arg_count == 1) {
2197                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2198                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2199                                 != 0) {
2200                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2201                                         "bonded device %s\n", name);
2202                         goto parse_error;
2203                 }
2204         } else if (arg_count > 1) {
2205                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2206                                 "bonded device %s\n", name);
2207                 goto parse_error;
2208         } else {
2209                 socket_id = rte_socket_id();
2210         }
2211
2212         /* Create link bonding eth device */
2213         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2214         if (port_id < 0) {
2215                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2216                                 "socket %u.\n", name, bonding_mode, socket_id);
2217                 goto parse_error;
2218         }
2219         internals = rte_eth_devices[port_id].data->dev_private;
2220         internals->kvlist = kvlist;
2221
2222         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2223                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2224         return 0;
2225
2226 parse_error:
2227         rte_kvargs_free(kvlist);
2228
2229         return -1;
2230 }
2231
2232 static int
2233 bond_uninit(const char *name)
2234 {
2235         int  ret;
2236
2237         if (name == NULL)
2238                 return -EINVAL;
2239
2240         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2241
2242         /* free link bonding eth device */
2243         ret = rte_eth_bond_free(name);
2244         if (ret < 0)
2245                 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2246
2247         return ret;
2248 }
2249
2250 /* this part will resolve the slave portids after all the other pdev and vdev
2251  * have been allocated */
2252 static int
2253 bond_ethdev_configure(struct rte_eth_dev *dev)
2254 {
2255         char *name = dev->data->name;
2256         struct bond_dev_private *internals = dev->data->dev_private;
2257         struct rte_kvargs *kvlist = internals->kvlist;
2258         int arg_count;
2259         uint8_t port_id = dev - rte_eth_devices;
2260
2261         static const uint8_t default_rss_key[40] = {
2262                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2263                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2264                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2265                 0xBE, 0xAC, 0x01, 0xFA
2266         };
2267
2268         unsigned i, j;
2269
2270         /* If RSS is enabled, fill table and key with default values */
2271         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2272                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2273                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2274                 memcpy(internals->rss_key, default_rss_key, 40);
2275
2276                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2277                         internals->reta_conf[i].mask = ~0LL;
2278                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2279                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2280                 }
2281         }
2282
2283         /*
2284          * if no kvlist, it means that this bonded device has been created
2285          * through the bonding api.
2286          */
2287         if (!kvlist)
2288                 return 0;
2289
2290         /* Parse MAC address for bonded device */
2291         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2292         if (arg_count == 1) {
2293                 struct ether_addr bond_mac;
2294
2295                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2296                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2297                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2298                                         name);
2299                         return -1;
2300                 }
2301
2302                 /* Set MAC address */
2303                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2304                         RTE_LOG(ERR, EAL,
2305                                         "Failed to set mac address on bonded device %s\n",
2306                                         name);
2307                         return -1;
2308                 }
2309         } else if (arg_count > 1) {
2310                 RTE_LOG(ERR, EAL,
2311                                 "MAC address can be specified only once for bonded device %s\n",
2312                                 name);
2313                 return -1;
2314         }
2315
2316         /* Parse/set balance mode transmit policy */
2317         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2318         if (arg_count == 1) {
2319                 uint8_t xmit_policy;
2320
2321                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2322                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2323                                                 0) {
2324                         RTE_LOG(INFO, EAL,
2325                                         "Invalid xmit policy specified for bonded device %s\n",
2326                                         name);
2327                         return -1;
2328                 }
2329
2330                 /* Set balance mode transmit policy*/
2331                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2332                         RTE_LOG(ERR, EAL,
2333                                         "Failed to set balance xmit policy on bonded device %s\n",
2334                                         name);
2335                         return -1;
2336                 }
2337         } else if (arg_count > 1) {
2338                 RTE_LOG(ERR, EAL,
2339                                 "Transmit policy can be specified only once for bonded device"
2340                                 " %s\n", name);
2341                 return -1;
2342         }
2343
2344         /* Parse/add slave ports to bonded device */
2345         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2346                 struct bond_ethdev_slave_ports slave_ports;
2347                 unsigned i;
2348
2349                 memset(&slave_ports, 0, sizeof(slave_ports));
2350
2351                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2352                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2353                         RTE_LOG(ERR, EAL,
2354                                         "Failed to parse slave ports for bonded device %s\n",
2355                                         name);
2356                         return -1;
2357                 }
2358
2359                 for (i = 0; i < slave_ports.slave_count; i++) {
2360                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2361                                 RTE_LOG(ERR, EAL,
2362                                                 "Failed to add port %d as slave to bonded device %s\n",
2363                                                 slave_ports.slaves[i], name);
2364                         }
2365                 }
2366
2367         } else {
2368                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2369                 return -1;
2370         }
2371
2372         /* Parse/set primary slave port id*/
2373         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2374         if (arg_count == 1) {
2375                 uint8_t primary_slave_port_id;
2376
2377                 if (rte_kvargs_process(kvlist,
2378                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
2379                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2380                                 &primary_slave_port_id) < 0) {
2381                         RTE_LOG(INFO, EAL,
2382                                         "Invalid primary slave port id specified for bonded device"
2383                                         " %s\n", name);
2384                         return -1;
2385                 }
2386
2387                 /* Set balance mode transmit policy*/
2388                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2389                                 != 0) {
2390                         RTE_LOG(ERR, EAL,
2391                                         "Failed to set primary slave port %d on bonded device %s\n",
2392                                         primary_slave_port_id, name);
2393                         return -1;
2394                 }
2395         } else if (arg_count > 1) {
2396                 RTE_LOG(INFO, EAL,
2397                                 "Primary slave can be specified only once for bonded device"
2398                                 " %s\n", name);
2399                 return -1;
2400         }
2401
2402         /* Parse link status monitor polling interval */
2403         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2404         if (arg_count == 1) {
2405                 uint32_t lsc_poll_interval_ms;
2406
2407                 if (rte_kvargs_process(kvlist,
2408                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2409                                 &bond_ethdev_parse_time_ms_kvarg,
2410                                 &lsc_poll_interval_ms) < 0) {
2411                         RTE_LOG(INFO, EAL,
2412                                         "Invalid lsc polling interval value specified for bonded"
2413                                         " device %s\n", name);
2414                         return -1;
2415                 }
2416
2417                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2418                                 != 0) {
2419                         RTE_LOG(ERR, EAL,
2420                                         "Failed to set lsc monitor polling interval (%u ms) on"
2421                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2422                         return -1;
2423                 }
2424         } else if (arg_count > 1) {
2425                 RTE_LOG(INFO, EAL,
2426                                 "LSC polling interval can be specified only once for bonded"
2427                                 " device %s\n", name);
2428                 return -1;
2429         }
2430
2431         /* Parse link up interrupt propagation delay */
2432         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2433         if (arg_count == 1) {
2434                 uint32_t link_up_delay_ms;
2435
2436                 if (rte_kvargs_process(kvlist,
2437                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2438                                 &bond_ethdev_parse_time_ms_kvarg,
2439                                 &link_up_delay_ms) < 0) {
2440                         RTE_LOG(INFO, EAL,
2441                                         "Invalid link up propagation delay value specified for"
2442                                         " bonded device %s\n", name);
2443                         return -1;
2444                 }
2445
2446                 /* Set balance mode transmit policy*/
2447                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2448                                 != 0) {
2449                         RTE_LOG(ERR, EAL,
2450                                         "Failed to set link up propagation delay (%u ms) on bonded"
2451                                         " device %s\n", link_up_delay_ms, name);
2452                         return -1;
2453                 }
2454         } else if (arg_count > 1) {
2455                 RTE_LOG(INFO, EAL,
2456                                 "Link up propagation delay can be specified only once for"
2457                                 " bonded device %s\n", name);
2458                 return -1;
2459         }
2460
2461         /* Parse link down interrupt propagation delay */
2462         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2463         if (arg_count == 1) {
2464                 uint32_t link_down_delay_ms;
2465
2466                 if (rte_kvargs_process(kvlist,
2467                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2468                                 &bond_ethdev_parse_time_ms_kvarg,
2469                                 &link_down_delay_ms) < 0) {
2470                         RTE_LOG(INFO, EAL,
2471                                         "Invalid link down propagation delay value specified for"
2472                                         " bonded device %s\n", name);
2473                         return -1;
2474                 }
2475
2476                 /* Set balance mode transmit policy*/
2477                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2478                                 != 0) {
2479                         RTE_LOG(ERR, EAL,
2480                                         "Failed to set link down propagation delay (%u ms) on"
2481                                         " bonded device %s\n", link_down_delay_ms, name);
2482                         return -1;
2483                 }
2484         } else if (arg_count > 1) {
2485                 RTE_LOG(INFO, EAL,
2486                                 "Link down propagation delay can be specified only once for"
2487                                 " bonded device %s\n", name);
2488                 return -1;
2489         }
2490
2491         return 0;
2492 }
2493
2494 static struct rte_driver bond_drv = {
2495         .name = "eth_bond",
2496         .type = PMD_VDEV,
2497         .init = bond_init,
2498         .uninit = bond_uninit,
2499 };
2500
2501 PMD_REGISTER_DRIVER(bond_drv);