bonding: fix build with icc
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
44 #include <rte_dev.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
47
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
51
52 #define REORDER_PERIOD_MS 10
53
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
55
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
58
59 static inline size_t
60 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
61 {
62         size_t vlan_offset = 0;
63
64         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
65                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
66
67                 vlan_offset = sizeof(struct vlan_hdr);
68                 *proto = vlan_hdr->eth_proto;
69
70                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
71                         vlan_hdr = vlan_hdr + 1;
72                         *proto = vlan_hdr->eth_proto;
73                         vlan_offset += sizeof(struct vlan_hdr);
74                 }
75         }
76         return vlan_offset;
77 }
78
79 static uint16_t
80 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
81 {
82         struct bond_dev_private *internals;
83
84         uint16_t num_rx_slave = 0;
85         uint16_t num_rx_total = 0;
86
87         int i;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94
95         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
96                 /* Offset of pointer to *bufs increases as packets are received
97                  * from other slaves */
98                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
99                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
100                 if (num_rx_slave) {
101                         num_rx_total += num_rx_slave;
102                         nb_pkts -= num_rx_slave;
103                 }
104         }
105
106         return num_rx_total;
107 }
108
109 static uint16_t
110 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
111                 uint16_t nb_pkts)
112 {
113         struct bond_dev_private *internals;
114
115         /* Cast to structure, containing bonded device's port id and queue id */
116         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
117
118         internals = bd_rx_q->dev_private;
119
120         return rte_eth_rx_burst(internals->current_primary_port,
121                         bd_rx_q->queue_id, bufs, nb_pkts);
122 }
123
124 static uint16_t
125 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
126                 uint16_t nb_pkts)
127 {
128         /* Cast to structure, containing bonded device's port id and queue id */
129         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
130         struct bond_dev_private *internals = bd_rx_q->dev_private;
131         struct ether_addr bond_mac;
132
133         struct ether_hdr *hdr;
134
135         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136         uint16_t num_rx_total = 0;      /* Total number of received packets */
137         uint8_t slaves[RTE_MAX_ETHPORTS];
138         uint8_t slave_count;
139
140         uint8_t collecting;  /* current slave collecting status */
141         const uint8_t promisc = internals->promiscuous_en;
142         uint8_t i, j, k;
143
144         rte_eth_macaddr_get(internals->port_id, &bond_mac);
145         /* Copy slave list to protect against slave up/down changes during tx
146          * bursting */
147         slave_count = internals->active_slave_count;
148         memcpy(slaves, internals->active_slaves,
149                         sizeof(internals->active_slaves[0]) * slave_count);
150
151         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
152                 j = num_rx_total;
153                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
154
155                 /* Read packets from this slave */
156                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
157                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
158
159                 for (k = j; k < 2 && k < num_rx_total; k++)
160                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
161
162                 /* Handle slow protocol packets. */
163                 while (j < num_rx_total) {
164                         if (j + 3 < num_rx_total)
165                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
166
167                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
168                         /* Remove packet from array if it is slow packet or slave is not
169                          * in collecting state or bondign interface is not in promiscus
170                          * mode and packet address does not match. */
171                         if (unlikely(hdr->ether_type == ether_type_slow_be ||
172                                 !collecting || (!promisc &&
173                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
174
175                                 if (hdr->ether_type == ether_type_slow_be) {
176                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
177                                                 bufs[j]);
178                                 } else
179                                         rte_pktmbuf_free(bufs[j]);
180
181                                 /* Packet is managed by mode 4 or dropped, shift the array */
182                                 num_rx_total--;
183                                 if (j < num_rx_total) {
184                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
185                                                 (num_rx_total - j));
186                                 }
187                         } else
188                                 j++;
189                 }
190         }
191
192         return num_rx_total;
193 }
194
195 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
196 uint32_t burstnumberRX;
197 uint32_t burstnumberTX;
198
199 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
200
201 static void
202 arp_op_name(uint16_t arp_op, char *buf)
203 {
204         switch (arp_op) {
205         case ARP_OP_REQUEST:
206                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
207                 return;
208         case ARP_OP_REPLY:
209                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
210                 return;
211         case ARP_OP_REVREQUEST:
212                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
213                                 "Reverse ARP Request");
214                 return;
215         case ARP_OP_REVREPLY:
216                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
217                                 "Reverse ARP Reply");
218                 return;
219         case ARP_OP_INVREQUEST:
220                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
221                                 "Peer Identify Request");
222                 return;
223         case ARP_OP_INVREPLY:
224                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
225                                 "Peer Identify Reply");
226                 return;
227         default:
228                 break;
229         }
230         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
231         return;
232 }
233 #endif
234 #define MaxIPv4String   16
235 static void
236 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
237 {
238         uint32_t ipv4_addr;
239
240         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
241         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
242                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
243                 ipv4_addr & 0xFF);
244 }
245
246 #define MAX_CLIENTS_NUMBER      128
247 uint8_t active_clients;
248 struct client_stats_t {
249         uint8_t port;
250         uint32_t ipv4_addr;
251         uint32_t ipv4_rx_packets;
252         uint32_t ipv4_tx_packets;
253 };
254 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
255
256 static void
257 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
258 {
259         int i = 0;
260
261         for (; i < MAX_CLIENTS_NUMBER; i++)     {
262                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
263                         /* Just update RX packets number for this client */
264                         if (TXorRXindicator == &burstnumberRX)
265                                 client_stats[i].ipv4_rx_packets++;
266                         else
267                                 client_stats[i].ipv4_tx_packets++;
268                         return;
269                 }
270         }
271         /* We have a new client. Insert him to the table, and increment stats */
272         if (TXorRXindicator == &burstnumberRX)
273                 client_stats[active_clients].ipv4_rx_packets++;
274         else
275                 client_stats[active_clients].ipv4_tx_packets++;
276         client_stats[active_clients].ipv4_addr = addr;
277         client_stats[active_clients].port = port;
278         active_clients++;
279
280 }
281
282 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
283 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
284                 RTE_LOG(DEBUG, PMD, \
285                 "%s " \
286                 "port:%d " \
287                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
288                 "SrcIP:%s " \
289                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
290                 "DstIP:%s " \
291                 "%s " \
292                 "%d\n", \
293                 info, \
294                 port, \
295                 eth_h->s_addr.addr_bytes[0], \
296                 eth_h->s_addr.addr_bytes[1], \
297                 eth_h->s_addr.addr_bytes[2], \
298                 eth_h->s_addr.addr_bytes[3], \
299                 eth_h->s_addr.addr_bytes[4], \
300                 eth_h->s_addr.addr_bytes[5], \
301                 src_ip, \
302                 eth_h->d_addr.addr_bytes[0], \
303                 eth_h->d_addr.addr_bytes[1], \
304                 eth_h->d_addr.addr_bytes[2], \
305                 eth_h->d_addr.addr_bytes[3], \
306                 eth_h->d_addr.addr_bytes[4], \
307                 eth_h->d_addr.addr_bytes[5], \
308                 dst_ip, \
309                 arp_op, \
310                 ++burstnumber)
311 #endif
312
313 static void
314 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
315                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
316 {
317         struct ipv4_hdr *ipv4_h;
318 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
319         struct arp_hdr *arp_h;
320         char dst_ip[16];
321         char ArpOp[24];
322         char buf[16];
323 #endif
324         char src_ip[16];
325
326         uint16_t ether_type = eth_h->ether_type;
327         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
328
329 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
330         snprintf(buf, 16, "%s", info);
331 #endif
332
333         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
334                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
335                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
336 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
337                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
338                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
339 #endif
340                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
341         }
342 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
343         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
344                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
345                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
346                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
347                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
348                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
349         }
350 #endif
351 }
352 #endif
353
354 static uint16_t
355 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
356 {
357         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
358         struct bond_dev_private *internals = bd_tx_q->dev_private;
359         struct ether_hdr *eth_h;
360         uint16_t ether_type, offset;
361         uint16_t nb_recv_pkts;
362         int i;
363
364         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
365
366         for (i = 0; i < nb_recv_pkts; i++) {
367                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
368                 ether_type = eth_h->ether_type;
369                 offset = get_vlan_offset(eth_h, &ether_type);
370
371                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
372 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
373                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
374 #endif
375                         bond_mode_alb_arp_recv(eth_h, offset, internals);
376                 }
377 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
378                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
379                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
380 #endif
381         }
382
383         return nb_recv_pkts;
384 }
385
386 static uint16_t
387 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
388                 uint16_t nb_pkts)
389 {
390         struct bond_dev_private *internals;
391         struct bond_tx_queue *bd_tx_q;
392
393         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
394         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
395
396         uint8_t num_of_slaves;
397         uint8_t slaves[RTE_MAX_ETHPORTS];
398
399         uint16_t num_tx_total = 0, num_tx_slave;
400
401         static int slave_idx = 0;
402         int i, cslave_idx = 0, tx_fail_total = 0;
403
404         bd_tx_q = (struct bond_tx_queue *)queue;
405         internals = bd_tx_q->dev_private;
406
407         /* Copy slave list to protect against slave up/down changes during tx
408          * bursting */
409         num_of_slaves = internals->active_slave_count;
410         memcpy(slaves, internals->active_slaves,
411                         sizeof(internals->active_slaves[0]) * num_of_slaves);
412
413         if (num_of_slaves < 1)
414                 return num_tx_total;
415
416         /* Populate slaves mbuf with which packets are to be sent on it  */
417         for (i = 0; i < nb_pkts; i++) {
418                 cslave_idx = (slave_idx + i) % num_of_slaves;
419                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
420         }
421
422         /* increment current slave index so the next call to tx burst starts on the
423          * next slave */
424         slave_idx = ++cslave_idx;
425
426         /* Send packet burst on each slave device */
427         for (i = 0; i < num_of_slaves; i++) {
428                 if (slave_nb_pkts[i] > 0) {
429                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
430                                         slave_bufs[i], slave_nb_pkts[i]);
431
432                         /* if tx burst fails move packets to end of bufs */
433                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
434                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
435
436                                 tx_fail_total += tx_fail_slave;
437
438                                 memcpy(&bufs[nb_pkts - tx_fail_total],
439                                                 &slave_bufs[i][num_tx_slave],
440                                                 tx_fail_slave * sizeof(bufs[0]));
441                         }
442                         num_tx_total += num_tx_slave;
443                 }
444         }
445
446         return num_tx_total;
447 }
448
449 static uint16_t
450 bond_ethdev_tx_burst_active_backup(void *queue,
451                 struct rte_mbuf **bufs, uint16_t nb_pkts)
452 {
453         struct bond_dev_private *internals;
454         struct bond_tx_queue *bd_tx_q;
455
456         bd_tx_q = (struct bond_tx_queue *)queue;
457         internals = bd_tx_q->dev_private;
458
459         if (internals->active_slave_count < 1)
460                 return 0;
461
462         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
463                         bufs, nb_pkts);
464 }
465
466 static inline uint16_t
467 ether_hash(struct ether_hdr *eth_hdr)
468 {
469         unaligned_uint16_t *word_src_addr =
470                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
471         unaligned_uint16_t *word_dst_addr =
472                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
473
474         return (word_src_addr[0] ^ word_dst_addr[0]) ^
475                         (word_src_addr[1] ^ word_dst_addr[1]) ^
476                         (word_src_addr[2] ^ word_dst_addr[2]);
477 }
478
479 static inline uint32_t
480 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
481 {
482         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
483 }
484
485 static inline uint32_t
486 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
487 {
488         unaligned_uint32_t *word_src_addr =
489                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
490         unaligned_uint32_t *word_dst_addr =
491                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
492
493         return (word_src_addr[0] ^ word_dst_addr[0]) ^
494                         (word_src_addr[1] ^ word_dst_addr[1]) ^
495                         (word_src_addr[2] ^ word_dst_addr[2]) ^
496                         (word_src_addr[3] ^ word_dst_addr[3]);
497 }
498
499 uint16_t
500 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
501 {
502         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
503
504         uint32_t hash = ether_hash(eth_hdr);
505
506         return (hash ^= hash >> 8) % slave_count;
507 }
508
509 uint16_t
510 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
511 {
512         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
513         uint16_t proto = eth_hdr->ether_type;
514         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
515         uint32_t hash, l3hash = 0;
516
517         hash = ether_hash(eth_hdr);
518
519         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
520                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
521                                 ((char *)(eth_hdr + 1) + vlan_offset);
522                 l3hash = ipv4_hash(ipv4_hdr);
523
524         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
525                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
526                                 ((char *)(eth_hdr + 1) + vlan_offset);
527                 l3hash = ipv6_hash(ipv6_hdr);
528         }
529
530         hash = hash ^ l3hash;
531         hash ^= hash >> 16;
532         hash ^= hash >> 8;
533
534         return hash % slave_count;
535 }
536
537 uint16_t
538 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
539 {
540         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
541         uint16_t proto = eth_hdr->ether_type;
542         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
543
544         struct udp_hdr *udp_hdr = NULL;
545         struct tcp_hdr *tcp_hdr = NULL;
546         uint32_t hash, l3hash = 0, l4hash = 0;
547
548         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
549                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
550                                 ((char *)(eth_hdr + 1) + vlan_offset);
551                 size_t ip_hdr_offset;
552
553                 l3hash = ipv4_hash(ipv4_hdr);
554
555                 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
556                                 IPV4_IHL_MULTIPLIER;
557
558                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
559                         tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
560                                         ip_hdr_offset);
561                         l4hash = HASH_L4_PORTS(tcp_hdr);
562                 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
563                         udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
564                                         ip_hdr_offset);
565                         l4hash = HASH_L4_PORTS(udp_hdr);
566                 }
567         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
568                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
569                                 ((char *)(eth_hdr + 1) + vlan_offset);
570                 l3hash = ipv6_hash(ipv6_hdr);
571
572                 if (ipv6_hdr->proto == IPPROTO_TCP) {
573                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
574                         l4hash = HASH_L4_PORTS(tcp_hdr);
575                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
576                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
577                         l4hash = HASH_L4_PORTS(udp_hdr);
578                 }
579         }
580
581         hash = l3hash ^ l4hash;
582         hash ^= hash >> 16;
583         hash ^= hash >> 8;
584
585         return hash % slave_count;
586 }
587
588 struct bwg_slave {
589         uint64_t bwg_left_int;
590         uint64_t bwg_left_remainder;
591         uint8_t slave;
592 };
593
594 void
595 bond_tlb_activate_slave(struct bond_dev_private *internals) {
596         int i;
597
598         for (i = 0; i < internals->active_slave_count; i++) {
599                 tlb_last_obytets[internals->active_slaves[i]] = 0;
600         }
601 }
602
603 static int
604 bandwidth_cmp(const void *a, const void *b)
605 {
606         const struct bwg_slave *bwg_a = a;
607         const struct bwg_slave *bwg_b = b;
608         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
609         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
610                         (int64_t)bwg_a->bwg_left_remainder;
611         if (diff > 0)
612                 return 1;
613         else if (diff < 0)
614                 return -1;
615         else if (diff2 > 0)
616                 return 1;
617         else if (diff2 < 0)
618                 return -1;
619         else
620                 return 0;
621 }
622
623 static void
624 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
625                 struct bwg_slave *bwg_slave)
626 {
627         struct rte_eth_link link_status;
628
629         rte_eth_link_get(port_id, &link_status);
630         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
631         if (link_bwg == 0)
632                 return;
633         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
634         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
635         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
636 }
637
638 static void
639 bond_ethdev_update_tlb_slave_cb(void *arg)
640 {
641         struct bond_dev_private *internals = arg;
642         struct rte_eth_stats slave_stats;
643         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
644         uint8_t slave_count;
645         uint64_t tx_bytes;
646
647         uint8_t update_stats = 0;
648         uint8_t i, slave_id;
649
650         internals->slave_update_idx++;
651
652
653         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
654                 update_stats = 1;
655
656         for (i = 0; i < internals->active_slave_count; i++) {
657                 slave_id = internals->active_slaves[i];
658                 rte_eth_stats_get(slave_id, &slave_stats);
659                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
660                 bandwidth_left(slave_id, tx_bytes,
661                                 internals->slave_update_idx, &bwg_array[i]);
662                 bwg_array[i].slave = slave_id;
663
664                 if (update_stats) {
665                         tlb_last_obytets[slave_id] = slave_stats.obytes;
666                 }
667         }
668
669         if (update_stats == 1)
670                 internals->slave_update_idx = 0;
671
672         slave_count = i;
673         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
674         for (i = 0; i < slave_count; i++)
675                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
676
677         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
678                         (struct bond_dev_private *)internals);
679 }
680
681 static uint16_t
682 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
683 {
684         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
685         struct bond_dev_private *internals = bd_tx_q->dev_private;
686
687         struct rte_eth_dev *primary_port =
688                         &rte_eth_devices[internals->primary_port];
689         uint16_t num_tx_total = 0;
690         uint8_t i, j;
691
692         uint8_t num_of_slaves = internals->active_slave_count;
693         uint8_t slaves[RTE_MAX_ETHPORTS];
694
695         struct ether_hdr *ether_hdr;
696         struct ether_addr primary_slave_addr;
697         struct ether_addr active_slave_addr;
698
699         if (num_of_slaves < 1)
700                 return num_tx_total;
701
702         memcpy(slaves, internals->tlb_slaves_order,
703                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
704
705
706         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
707
708         if (nb_pkts > 3) {
709                 for (i = 0; i < 3; i++)
710                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
711         }
712
713         for (i = 0; i < num_of_slaves; i++) {
714                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
715                 for (j = num_tx_total; j < nb_pkts; j++) {
716                         if (j + 3 < nb_pkts)
717                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
718
719                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
720                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
721                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
722 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
723                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
724 #endif
725                 }
726
727                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
728                                 bufs + num_tx_total, nb_pkts - num_tx_total);
729
730                 if (num_tx_total == nb_pkts)
731                         break;
732         }
733
734         return num_tx_total;
735 }
736
737 void
738 bond_tlb_disable(struct bond_dev_private *internals)
739 {
740         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
741 }
742
743 void
744 bond_tlb_enable(struct bond_dev_private *internals)
745 {
746         bond_ethdev_update_tlb_slave_cb(internals);
747 }
748
749 static uint16_t
750 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
751 {
752         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
753         struct bond_dev_private *internals = bd_tx_q->dev_private;
754
755         struct ether_hdr *eth_h;
756         uint16_t ether_type, offset;
757
758         struct client_data *client_info;
759
760         /*
761          * We create transmit buffers for every slave and one additional to send
762          * through tlb. In worst case every packet will be send on one port.
763          */
764         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
765         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
766
767         /*
768          * We create separate transmit buffers for update packets as they wont be
769          * counted in num_tx_total.
770          */
771         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
772         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
773
774         struct rte_mbuf *upd_pkt;
775         size_t pkt_size;
776
777         uint16_t num_send, num_not_send = 0;
778         uint16_t num_tx_total = 0;
779         uint8_t slave_idx;
780
781         int i, j;
782
783         /* Search tx buffer for ARP packets and forward them to alb */
784         for (i = 0; i < nb_pkts; i++) {
785                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
786                 ether_type = eth_h->ether_type;
787                 offset = get_vlan_offset(eth_h, &ether_type);
788
789                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
790                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
791
792                         /* Change src mac in eth header */
793                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
794
795                         /* Add packet to slave tx buffer */
796                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
797                         slave_bufs_pkts[slave_idx]++;
798                 } else {
799                         /* If packet is not ARP, send it with TLB policy */
800                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
801                                         bufs[i];
802                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
803                 }
804         }
805
806         /* Update connected client ARP tables */
807         if (internals->mode6.ntt) {
808                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
809                         client_info = &internals->mode6.client_table[i];
810
811                         if (client_info->in_use) {
812                                 /* Allocate new packet to send ARP update on current slave */
813                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
814                                 if (upd_pkt == NULL) {
815                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
816                                         continue;
817                                 }
818                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
819                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
820                                 upd_pkt->data_len = pkt_size;
821                                 upd_pkt->pkt_len = pkt_size;
822
823                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
824                                                 internals);
825
826                                 /* Add packet to update tx buffer */
827                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
828                                 update_bufs_pkts[slave_idx]++;
829                         }
830                 }
831                 internals->mode6.ntt = 0;
832         }
833
834         /* Send ARP packets on proper slaves */
835         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
836                 if (slave_bufs_pkts[i] > 0) {
837                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
838                                         slave_bufs[i], slave_bufs_pkts[i]);
839                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
840                                 bufs[nb_pkts - 1 - num_not_send - j] =
841                                                 slave_bufs[i][nb_pkts - 1 - j];
842                         }
843
844                         num_tx_total += num_send;
845                         num_not_send += slave_bufs_pkts[i] - num_send;
846
847 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
848         /* Print TX stats including update packets */
849                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
850                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
851                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
852                         }
853 #endif
854                 }
855         }
856
857         /* Send update packets on proper slaves */
858         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
859                 if (update_bufs_pkts[i] > 0) {
860                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
861                                         update_bufs_pkts[i]);
862                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
863                                 rte_pktmbuf_free(update_bufs[i][j]);
864                         }
865 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
866                         for (j = 0; j < update_bufs_pkts[i]; j++) {
867                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
868                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
869                         }
870 #endif
871                 }
872         }
873
874         /* Send non-ARP packets using tlb policy */
875         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
876                 num_send = bond_ethdev_tx_burst_tlb(queue,
877                                 slave_bufs[RTE_MAX_ETHPORTS],
878                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
879
880                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
881                         bufs[nb_pkts - 1 - num_not_send - j] =
882                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
883                 }
884
885                 num_tx_total += num_send;
886                 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
887         }
888
889         return num_tx_total;
890 }
891
892 static uint16_t
893 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
894                 uint16_t nb_pkts)
895 {
896         struct bond_dev_private *internals;
897         struct bond_tx_queue *bd_tx_q;
898
899         uint8_t num_of_slaves;
900         uint8_t slaves[RTE_MAX_ETHPORTS];
901
902         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
903
904         int i, op_slave_id;
905
906         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
907         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
908
909         bd_tx_q = (struct bond_tx_queue *)queue;
910         internals = bd_tx_q->dev_private;
911
912         /* Copy slave list to protect against slave up/down changes during tx
913          * bursting */
914         num_of_slaves = internals->active_slave_count;
915         memcpy(slaves, internals->active_slaves,
916                         sizeof(internals->active_slaves[0]) * num_of_slaves);
917
918         if (num_of_slaves < 1)
919                 return num_tx_total;
920
921         /* Populate slaves mbuf with the packets which are to be sent on it  */
922         for (i = 0; i < nb_pkts; i++) {
923                 /* Select output slave using hash based on xmit policy */
924                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
925
926                 /* Populate slave mbuf arrays with mbufs for that slave */
927                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
928         }
929
930         /* Send packet burst on each slave device */
931         for (i = 0; i < num_of_slaves; i++) {
932                 if (slave_nb_pkts[i] > 0) {
933                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
934                                         slave_bufs[i], slave_nb_pkts[i]);
935
936                         /* if tx burst fails move packets to end of bufs */
937                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
938                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
939
940                                 tx_fail_total += slave_tx_fail_count;
941                                 memcpy(&bufs[nb_pkts - tx_fail_total],
942                                                 &slave_bufs[i][num_tx_slave],
943                                                 slave_tx_fail_count * sizeof(bufs[0]));
944                         }
945
946                         num_tx_total += num_tx_slave;
947                 }
948         }
949
950         return num_tx_total;
951 }
952
953 static uint16_t
954 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
955                 uint16_t nb_pkts)
956 {
957         struct bond_dev_private *internals;
958         struct bond_tx_queue *bd_tx_q;
959
960         uint8_t num_of_slaves;
961         uint8_t slaves[RTE_MAX_ETHPORTS];
962          /* positions in slaves, not ID */
963         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
964         uint8_t distributing_count;
965
966         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
967         uint16_t i, j, op_slave_idx;
968         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
969
970         /* Allocate additional packets in case 8023AD mode. */
971         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
972         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
973
974         /* Total amount of packets in slave_bufs */
975         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
976         /* Slow packets placed in each slave */
977         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
978
979         bd_tx_q = (struct bond_tx_queue *)queue;
980         internals = bd_tx_q->dev_private;
981
982         /* Copy slave list to protect against slave up/down changes during tx
983          * bursting */
984         num_of_slaves = internals->active_slave_count;
985         if (num_of_slaves < 1)
986                 return num_tx_total;
987
988         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
989
990         distributing_count = 0;
991         for (i = 0; i < num_of_slaves; i++) {
992                 struct port *port = &mode_8023ad_ports[slaves[i]];
993
994                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
995                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
996                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
997
998                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
999                         slave_bufs[i][j] = slow_pkts[j];
1000
1001                 if (ACTOR_STATE(port, DISTRIBUTING))
1002                         distributing_offsets[distributing_count++] = i;
1003         }
1004
1005         if (likely(distributing_count > 0)) {
1006                 /* Populate slaves mbuf with the packets which are to be sent on it */
1007                 for (i = 0; i < nb_pkts; i++) {
1008                         /* Select output slave using hash based on xmit policy */
1009                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1010
1011                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1012                          * slaves that are currently distributing. */
1013                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1014                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1015                         slave_nb_pkts[slave_offset]++;
1016                 }
1017         }
1018
1019         /* Send packet burst on each slave device */
1020         for (i = 0; i < num_of_slaves; i++) {
1021                 if (slave_nb_pkts[i] == 0)
1022                         continue;
1023
1024                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1025                                 slave_bufs[i], slave_nb_pkts[i]);
1026
1027                 /* If tx burst fails drop slow packets */
1028                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1029                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1030
1031                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1032                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1033
1034                 /* If tx burst fails move packets to end of bufs */
1035                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1036                         uint16_t j = nb_pkts - num_tx_fail_total;
1037                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1038                                 bufs[j] = slave_bufs[i][num_tx_slave];
1039                 }
1040         }
1041
1042         return num_tx_total;
1043 }
1044
1045 static uint16_t
1046 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1047                 uint16_t nb_pkts)
1048 {
1049         struct bond_dev_private *internals;
1050         struct bond_tx_queue *bd_tx_q;
1051
1052         uint8_t tx_failed_flag = 0, num_of_slaves;
1053         uint8_t slaves[RTE_MAX_ETHPORTS];
1054
1055         uint16_t max_nb_of_tx_pkts = 0;
1056
1057         int slave_tx_total[RTE_MAX_ETHPORTS];
1058         int i, most_successful_tx_slave = -1;
1059
1060         bd_tx_q = (struct bond_tx_queue *)queue;
1061         internals = bd_tx_q->dev_private;
1062
1063         /* Copy slave list to protect against slave up/down changes during tx
1064          * bursting */
1065         num_of_slaves = internals->active_slave_count;
1066         memcpy(slaves, internals->active_slaves,
1067                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1068
1069         if (num_of_slaves < 1)
1070                 return 0;
1071
1072         /* Increment reference count on mbufs */
1073         for (i = 0; i < nb_pkts; i++)
1074                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1075
1076         /* Transmit burst on each active slave */
1077         for (i = 0; i < num_of_slaves; i++) {
1078                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1079                                         bufs, nb_pkts);
1080
1081                 if (unlikely(slave_tx_total[i] < nb_pkts))
1082                         tx_failed_flag = 1;
1083
1084                 /* record the value and slave index for the slave which transmits the
1085                  * maximum number of packets */
1086                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1087                         max_nb_of_tx_pkts = slave_tx_total[i];
1088                         most_successful_tx_slave = i;
1089                 }
1090         }
1091
1092         /* if slaves fail to transmit packets from burst, the calling application
1093          * is not expected to know about multiple references to packets so we must
1094          * handle failures of all packets except those of the most successful slave
1095          */
1096         if (unlikely(tx_failed_flag))
1097                 for (i = 0; i < num_of_slaves; i++)
1098                         if (i != most_successful_tx_slave)
1099                                 while (slave_tx_total[i] < nb_pkts)
1100                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1101
1102         return max_nb_of_tx_pkts;
1103 }
1104
1105 void
1106 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1107                 struct rte_eth_link *slave_dev_link)
1108 {
1109         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1110         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1111
1112         if (slave_dev_link->link_status &&
1113                 bonded_eth_dev->data->dev_started) {
1114                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1115                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1116
1117                 internals->link_props_set = 1;
1118         }
1119 }
1120
1121 void
1122 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1123 {
1124         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1125
1126         memset(&(bonded_eth_dev->data->dev_link), 0,
1127                         sizeof(bonded_eth_dev->data->dev_link));
1128
1129         internals->link_props_set = 0;
1130 }
1131
1132 int
1133 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1134                 struct rte_eth_link *slave_dev_link)
1135 {
1136         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1137                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1138                 return -1;
1139
1140         return 0;
1141 }
1142
1143 int
1144 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1145 {
1146         struct ether_addr *mac_addr;
1147
1148         if (eth_dev == NULL) {
1149                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1150                 return -1;
1151         }
1152
1153         if (dst_mac_addr == NULL) {
1154                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1155                 return -1;
1156         }
1157
1158         mac_addr = eth_dev->data->mac_addrs;
1159
1160         ether_addr_copy(mac_addr, dst_mac_addr);
1161         return 0;
1162 }
1163
1164 int
1165 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1166 {
1167         struct ether_addr *mac_addr;
1168
1169         if (eth_dev == NULL) {
1170                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1171                 return -1;
1172         }
1173
1174         if (new_mac_addr == NULL) {
1175                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1176                 return -1;
1177         }
1178
1179         mac_addr = eth_dev->data->mac_addrs;
1180
1181         /* If new MAC is different to current MAC then update */
1182         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1183                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1184
1185         return 0;
1186 }
1187
1188 int
1189 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1190 {
1191         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1192         int i;
1193
1194         /* Update slave devices MAC addresses */
1195         if (internals->slave_count < 1)
1196                 return -1;
1197
1198         switch (internals->mode) {
1199         case BONDING_MODE_ROUND_ROBIN:
1200         case BONDING_MODE_BALANCE:
1201         case BONDING_MODE_BROADCAST:
1202                 for (i = 0; i < internals->slave_count; i++) {
1203                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1204                                         bonded_eth_dev->data->mac_addrs)) {
1205                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1206                                                 internals->slaves[i].port_id);
1207                                 return -1;
1208                         }
1209                 }
1210                 break;
1211         case BONDING_MODE_8023AD:
1212                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1213                 break;
1214         case BONDING_MODE_ACTIVE_BACKUP:
1215         case BONDING_MODE_TLB:
1216         case BONDING_MODE_ALB:
1217         default:
1218                 for (i = 0; i < internals->slave_count; i++) {
1219                         if (internals->slaves[i].port_id ==
1220                                         internals->current_primary_port) {
1221                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1222                                                 bonded_eth_dev->data->mac_addrs)) {
1223                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1224                                                         internals->current_primary_port);
1225                                         return -1;
1226                                 }
1227                         } else {
1228                                 if (mac_address_set(
1229                                                 &rte_eth_devices[internals->slaves[i].port_id],
1230                                                 &internals->slaves[i].persisted_mac_addr)) {
1231                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1232                                                         internals->slaves[i].port_id);
1233                                         return -1;
1234                                 }
1235                         }
1236                 }
1237         }
1238
1239         return 0;
1240 }
1241
1242 int
1243 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1244 {
1245         struct bond_dev_private *internals;
1246
1247         internals = eth_dev->data->dev_private;
1248
1249         switch (mode) {
1250         case BONDING_MODE_ROUND_ROBIN:
1251                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1252                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1253                 break;
1254         case BONDING_MODE_ACTIVE_BACKUP:
1255                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1256                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1257                 break;
1258         case BONDING_MODE_BALANCE:
1259                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1260                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1261                 break;
1262         case BONDING_MODE_BROADCAST:
1263                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1264                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1265                 break;
1266         case BONDING_MODE_8023AD:
1267                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1268                         return -1;
1269
1270                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1271                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1272                 RTE_LOG(WARNING, PMD,
1273                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1274                                 "at least every 100ms.\n");
1275                 break;
1276         case BONDING_MODE_TLB:
1277                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1278                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1279                 break;
1280         case BONDING_MODE_ALB:
1281                 if (bond_mode_alb_enable(eth_dev) != 0)
1282                         return -1;
1283
1284                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1285                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1286                 break;
1287         default:
1288                 return -1;
1289         }
1290
1291         internals->mode = mode;
1292
1293         return 0;
1294 }
1295
1296 int
1297 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1298                 struct rte_eth_dev *slave_eth_dev)
1299 {
1300         struct bond_rx_queue *bd_rx_q;
1301         struct bond_tx_queue *bd_tx_q;
1302
1303         int errval;
1304         uint16_t q_id;
1305
1306         /* Stop slave */
1307         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1308
1309         /* Enable interrupts on slave device if supported */
1310         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1311                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1312
1313         /* If RSS is enabled for bonding, try to enable it for slaves  */
1314         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1315                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1316                                 != 0) {
1317                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1318                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1319                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1320                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1321                 } else {
1322                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1323                 }
1324
1325                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1326                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1327                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1328                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1329         }
1330
1331         /* Configure device */
1332         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1333                         bonded_eth_dev->data->nb_rx_queues,
1334                         bonded_eth_dev->data->nb_tx_queues,
1335                         &(slave_eth_dev->data->dev_conf));
1336         if (errval != 0) {
1337                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1338                                 slave_eth_dev->data->port_id, errval);
1339                 return errval;
1340         }
1341
1342         /* Setup Rx Queues */
1343         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1344                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1345
1346                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1347                                 bd_rx_q->nb_rx_desc,
1348                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1349                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1350                 if (errval != 0) {
1351                         RTE_BOND_LOG(ERR,
1352                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1353                                         slave_eth_dev->data->port_id, q_id, errval);
1354                         return errval;
1355                 }
1356         }
1357
1358         /* Setup Tx Queues */
1359         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1360                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1361
1362                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1363                                 bd_tx_q->nb_tx_desc,
1364                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1365                                 &bd_tx_q->tx_conf);
1366                 if (errval != 0) {
1367                         RTE_BOND_LOG(ERR,
1368                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1369                                         slave_eth_dev->data->port_id, q_id, errval);
1370                         return errval;
1371                 }
1372         }
1373
1374         /* Start device */
1375         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1376         if (errval != 0) {
1377                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1378                                 slave_eth_dev->data->port_id, errval);
1379                 return -1;
1380         }
1381
1382         /* If RSS is enabled for bonding, synchronize RETA */
1383         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1384                 int i;
1385                 struct bond_dev_private *internals;
1386
1387                 internals = bonded_eth_dev->data->dev_private;
1388
1389                 for (i = 0; i < internals->slave_count; i++) {
1390                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1391                                 errval = rte_eth_dev_rss_reta_update(
1392                                                 slave_eth_dev->data->port_id,
1393                                                 &internals->reta_conf[0],
1394                                                 internals->slaves[i].reta_size);
1395                                 if (errval != 0) {
1396                                         RTE_LOG(WARNING, PMD,
1397                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1398                                                         " RSS Configuration for bonding may be inconsistent.\n",
1399                                                         slave_eth_dev->data->port_id, errval);
1400                                 }
1401                                 break;
1402                         }
1403                 }
1404         }
1405
1406         /* If lsc interrupt is set, check initial slave's link status */
1407         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1408                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1409                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1410
1411         return 0;
1412 }
1413
1414 void
1415 slave_remove(struct bond_dev_private *internals,
1416                 struct rte_eth_dev *slave_eth_dev)
1417 {
1418         uint8_t i;
1419
1420         for (i = 0; i < internals->slave_count; i++)
1421                 if (internals->slaves[i].port_id ==
1422                                 slave_eth_dev->data->port_id)
1423                         break;
1424
1425         if (i < (internals->slave_count - 1))
1426                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1427                                 sizeof(internals->slaves[0]) *
1428                                 (internals->slave_count - i - 1));
1429
1430         internals->slave_count--;
1431 }
1432
1433 static void
1434 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1435
1436 void
1437 slave_add(struct bond_dev_private *internals,
1438                 struct rte_eth_dev *slave_eth_dev)
1439 {
1440         struct bond_slave_details *slave_details =
1441                         &internals->slaves[internals->slave_count];
1442
1443         slave_details->port_id = slave_eth_dev->data->port_id;
1444         slave_details->last_link_status = 0;
1445
1446         /* If slave device doesn't support interrupts then we need to enabled
1447          * polling to monitor link status */
1448         if (!(slave_eth_dev->data->dev_flags & RTE_PCI_DRV_INTR_LSC)) {
1449                 slave_details->link_status_poll_enabled = 1;
1450
1451                 if (!internals->link_status_polling_enabled) {
1452                         internals->link_status_polling_enabled = 1;
1453
1454                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1455                                         bond_ethdev_slave_link_status_change_monitor,
1456                                         (void *)&rte_eth_devices[internals->port_id]);
1457                 }
1458         }
1459
1460         slave_details->link_status_wait_to_complete = 0;
1461         /* clean tlb_last_obytes when adding port for bonding device */
1462         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1463                         sizeof(struct ether_addr));
1464 }
1465
1466 void
1467 bond_ethdev_primary_set(struct bond_dev_private *internals,
1468                 uint8_t slave_port_id)
1469 {
1470         int i;
1471
1472         if (internals->active_slave_count < 1)
1473                 internals->current_primary_port = slave_port_id;
1474         else
1475                 /* Search bonded device slave ports for new proposed primary port */
1476                 for (i = 0; i < internals->active_slave_count; i++) {
1477                         if (internals->active_slaves[i] == slave_port_id)
1478                                 internals->current_primary_port = slave_port_id;
1479                 }
1480 }
1481
1482 static void
1483 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1484
1485 static int
1486 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1487 {
1488         struct bond_dev_private *internals;
1489         int i;
1490
1491         /* slave eth dev will be started by bonded device */
1492         if (check_for_bonded_ethdev(eth_dev)) {
1493                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1494                                 eth_dev->data->port_id);
1495                 return -1;
1496         }
1497
1498         eth_dev->data->dev_link.link_status = 0;
1499         eth_dev->data->dev_started = 1;
1500
1501         internals = eth_dev->data->dev_private;
1502
1503         if (internals->slave_count == 0) {
1504                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1505                 return -1;
1506         }
1507
1508         if (internals->user_defined_mac == 0) {
1509                 struct ether_addr *new_mac_addr = NULL;
1510
1511                 for (i = 0; i < internals->slave_count; i++)
1512                         if (internals->slaves[i].port_id == internals->primary_port)
1513                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1514
1515                 if (new_mac_addr == NULL)
1516                         return -1;
1517
1518                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1519                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1520                                         eth_dev->data->port_id);
1521                         return -1;
1522                 }
1523         }
1524
1525         /* Update all slave devices MACs*/
1526         if (mac_address_slaves_update(eth_dev) != 0)
1527                 return -1;
1528
1529         /* If bonded device is configure in promiscuous mode then re-apply config */
1530         if (internals->promiscuous_en)
1531                 bond_ethdev_promiscuous_enable(eth_dev);
1532
1533         /* Reconfigure each slave device if starting bonded device */
1534         for (i = 0; i < internals->slave_count; i++) {
1535                 if (slave_configure(eth_dev,
1536                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1537                         RTE_BOND_LOG(ERR,
1538                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1539                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1540                         return -1;
1541                 }
1542         }
1543
1544         if (internals->user_defined_primary_port)
1545                 bond_ethdev_primary_set(internals, internals->primary_port);
1546
1547         if (internals->mode == BONDING_MODE_8023AD)
1548                 bond_mode_8023ad_start(eth_dev);
1549
1550         if (internals->mode == BONDING_MODE_TLB ||
1551                         internals->mode == BONDING_MODE_ALB)
1552                 bond_tlb_enable(internals);
1553
1554         return 0;
1555 }
1556
1557 static void
1558 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1559 {
1560         uint8_t i;
1561
1562         if (dev->data->rx_queues != NULL) {
1563                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1564                         rte_free(dev->data->rx_queues[i]);
1565                         dev->data->rx_queues[i] = NULL;
1566                 }
1567                 dev->data->nb_rx_queues = 0;
1568         }
1569
1570         if (dev->data->tx_queues != NULL) {
1571                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1572                         rte_free(dev->data->tx_queues[i]);
1573                         dev->data->tx_queues[i] = NULL;
1574                 }
1575                 dev->data->nb_tx_queues = 0;
1576         }
1577 }
1578
1579 void
1580 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1581 {
1582         struct bond_dev_private *internals = eth_dev->data->dev_private;
1583         uint8_t i;
1584
1585         if (internals->mode == BONDING_MODE_8023AD) {
1586                 struct port *port;
1587                 void *pkt = NULL;
1588
1589                 bond_mode_8023ad_stop(eth_dev);
1590
1591                 /* Discard all messages to/from mode 4 state machines */
1592                 for (i = 0; i < internals->active_slave_count; i++) {
1593                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1594
1595                         RTE_VERIFY(port->rx_ring != NULL);
1596                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1597                                 rte_pktmbuf_free(pkt);
1598
1599                         RTE_VERIFY(port->tx_ring != NULL);
1600                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1601                                 rte_pktmbuf_free(pkt);
1602                 }
1603         }
1604
1605         if (internals->mode == BONDING_MODE_TLB ||
1606                         internals->mode == BONDING_MODE_ALB) {
1607                 bond_tlb_disable(internals);
1608                 for (i = 0; i < internals->active_slave_count; i++)
1609                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1610         }
1611
1612         internals->active_slave_count = 0;
1613         internals->link_status_polling_enabled = 0;
1614
1615         eth_dev->data->dev_link.link_status = 0;
1616         eth_dev->data->dev_started = 0;
1617 }
1618
1619 void
1620 bond_ethdev_close(struct rte_eth_dev *dev)
1621 {
1622         bond_ethdev_free_queues(dev);
1623 }
1624
1625 /* forward declaration */
1626 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1627
1628 static void
1629 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1630 {
1631         struct bond_dev_private *internals = dev->data->dev_private;
1632
1633         dev_info->max_mac_addrs = 1;
1634
1635         dev_info->max_rx_pktlen = (uint32_t)2048;
1636
1637         dev_info->max_rx_queues = (uint16_t)128;
1638         dev_info->max_tx_queues = (uint16_t)512;
1639
1640         dev_info->min_rx_bufsize = 0;
1641         dev_info->pci_dev = NULL;
1642
1643         dev_info->rx_offload_capa = internals->rx_offload_capa;
1644         dev_info->tx_offload_capa = internals->tx_offload_capa;
1645         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1646
1647         dev_info->reta_size = internals->reta_size;
1648 }
1649
1650 static int
1651 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1652                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1653                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1654 {
1655         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1656                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1657                                         0, dev->data->numa_node);
1658         if (bd_rx_q == NULL)
1659                 return -1;
1660
1661         bd_rx_q->queue_id = rx_queue_id;
1662         bd_rx_q->dev_private = dev->data->dev_private;
1663
1664         bd_rx_q->nb_rx_desc = nb_rx_desc;
1665
1666         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1667         bd_rx_q->mb_pool = mb_pool;
1668
1669         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1670
1671         return 0;
1672 }
1673
1674 static int
1675 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1676                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1677                 const struct rte_eth_txconf *tx_conf)
1678 {
1679         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1680                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1681                                         0, dev->data->numa_node);
1682
1683         if (bd_tx_q == NULL)
1684                 return -1;
1685
1686         bd_tx_q->queue_id = tx_queue_id;
1687         bd_tx_q->dev_private = dev->data->dev_private;
1688
1689         bd_tx_q->nb_tx_desc = nb_tx_desc;
1690         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1691
1692         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1693
1694         return 0;
1695 }
1696
1697 static void
1698 bond_ethdev_rx_queue_release(void *queue)
1699 {
1700         if (queue == NULL)
1701                 return;
1702
1703         rte_free(queue);
1704 }
1705
1706 static void
1707 bond_ethdev_tx_queue_release(void *queue)
1708 {
1709         if (queue == NULL)
1710                 return;
1711
1712         rte_free(queue);
1713 }
1714
1715 static void
1716 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1717 {
1718         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1719         struct bond_dev_private *internals;
1720
1721         /* Default value for polling slave found is true as we don't want to
1722          * disable the polling thread if we cannot get the lock */
1723         int i, polling_slave_found = 1;
1724
1725         if (cb_arg == NULL)
1726                 return;
1727
1728         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1729         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1730
1731         if (!bonded_ethdev->data->dev_started ||
1732                 !internals->link_status_polling_enabled)
1733                 return;
1734
1735         /* If device is currently being configured then don't check slaves link
1736          * status, wait until next period */
1737         if (rte_spinlock_trylock(&internals->lock)) {
1738                 if (internals->slave_count > 0)
1739                         polling_slave_found = 0;
1740
1741                 for (i = 0; i < internals->slave_count; i++) {
1742                         if (!internals->slaves[i].link_status_poll_enabled)
1743                                 continue;
1744
1745                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1746                         polling_slave_found = 1;
1747
1748                         /* Update slave link status */
1749                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1750                                         internals->slaves[i].link_status_wait_to_complete);
1751
1752                         /* if link status has changed since last checked then call lsc
1753                          * event callback */
1754                         if (slave_ethdev->data->dev_link.link_status !=
1755                                         internals->slaves[i].last_link_status) {
1756                                 internals->slaves[i].last_link_status =
1757                                                 slave_ethdev->data->dev_link.link_status;
1758
1759                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1760                                                 RTE_ETH_EVENT_INTR_LSC,
1761                                                 &bonded_ethdev->data->port_id);
1762                         }
1763                 }
1764                 rte_spinlock_unlock(&internals->lock);
1765         }
1766
1767         if (polling_slave_found)
1768                 /* Set alarm to continue monitoring link status of slave ethdev's */
1769                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1770                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1771 }
1772
1773 static int
1774 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1775                 int wait_to_complete)
1776 {
1777         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1778
1779         if (!bonded_eth_dev->data->dev_started ||
1780                 internals->active_slave_count == 0) {
1781                 bonded_eth_dev->data->dev_link.link_status = 0;
1782                 return 0;
1783         } else {
1784                 struct rte_eth_dev *slave_eth_dev;
1785                 int i, link_up = 0;
1786
1787                 for (i = 0; i < internals->active_slave_count; i++) {
1788                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1789
1790                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1791                                         wait_to_complete);
1792                         if (slave_eth_dev->data->dev_link.link_status == 1) {
1793                                 link_up = 1;
1794                                 break;
1795                         }
1796                 }
1797
1798                 bonded_eth_dev->data->dev_link.link_status = link_up;
1799         }
1800
1801         return 0;
1802 }
1803
1804 static void
1805 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1806 {
1807         struct bond_dev_private *internals = dev->data->dev_private;
1808         struct rte_eth_stats slave_stats;
1809         int i, j;
1810
1811         for (i = 0; i < internals->slave_count; i++) {
1812                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1813
1814                 stats->ipackets += slave_stats.ipackets;
1815                 stats->opackets += slave_stats.opackets;
1816                 stats->ibytes += slave_stats.ibytes;
1817                 stats->obytes += slave_stats.obytes;
1818                 stats->ierrors += slave_stats.ierrors;
1819                 stats->oerrors += slave_stats.oerrors;
1820                 stats->imcasts += slave_stats.imcasts;
1821                 stats->rx_nombuf += slave_stats.rx_nombuf;
1822                 stats->fdirmatch += slave_stats.fdirmatch;
1823                 stats->fdirmiss += slave_stats.fdirmiss;
1824                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1825                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1826                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1827                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1828
1829                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1830                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1831                         stats->q_opackets[j] += slave_stats.q_opackets[j];
1832                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1833                         stats->q_obytes[j] += slave_stats.q_obytes[j];
1834                         stats->q_errors[j] += slave_stats.q_errors[j];
1835                 }
1836
1837         }
1838 }
1839
1840 static void
1841 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1842 {
1843         struct bond_dev_private *internals = dev->data->dev_private;
1844         int i;
1845
1846         for (i = 0; i < internals->slave_count; i++)
1847                 rte_eth_stats_reset(internals->slaves[i].port_id);
1848 }
1849
1850 static void
1851 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1852 {
1853         struct bond_dev_private *internals = eth_dev->data->dev_private;
1854         int i;
1855
1856         internals->promiscuous_en = 1;
1857
1858         switch (internals->mode) {
1859         /* Promiscuous mode is propagated to all slaves */
1860         case BONDING_MODE_ROUND_ROBIN:
1861         case BONDING_MODE_BALANCE:
1862         case BONDING_MODE_BROADCAST:
1863                 for (i = 0; i < internals->slave_count; i++)
1864                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1865                 break;
1866         /* In mode4 promiscus mode is managed when slave is added/removed */
1867         case BONDING_MODE_8023AD:
1868                 break;
1869         /* Promiscuous mode is propagated only to primary slave */
1870         case BONDING_MODE_ACTIVE_BACKUP:
1871         case BONDING_MODE_TLB:
1872         case BONDING_MODE_ALB:
1873         default:
1874                 rte_eth_promiscuous_enable(internals->current_primary_port);
1875         }
1876 }
1877
1878 static void
1879 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1880 {
1881         struct bond_dev_private *internals = dev->data->dev_private;
1882         int i;
1883
1884         internals->promiscuous_en = 0;
1885
1886         switch (internals->mode) {
1887         /* Promiscuous mode is propagated to all slaves */
1888         case BONDING_MODE_ROUND_ROBIN:
1889         case BONDING_MODE_BALANCE:
1890         case BONDING_MODE_BROADCAST:
1891                 for (i = 0; i < internals->slave_count; i++)
1892                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1893                 break;
1894         /* In mode4 promiscus mode is set managed when slave is added/removed */
1895         case BONDING_MODE_8023AD:
1896                 break;
1897         /* Promiscuous mode is propagated only to primary slave */
1898         case BONDING_MODE_ACTIVE_BACKUP:
1899         case BONDING_MODE_TLB:
1900         case BONDING_MODE_ALB:
1901         default:
1902                 rte_eth_promiscuous_disable(internals->current_primary_port);
1903         }
1904 }
1905
1906 static void
1907 bond_ethdev_delayed_lsc_propagation(void *arg)
1908 {
1909         if (arg == NULL)
1910                 return;
1911
1912         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1913                         RTE_ETH_EVENT_INTR_LSC);
1914 }
1915
1916 void
1917 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1918                 void *param)
1919 {
1920         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1921         struct bond_dev_private *internals;
1922         struct rte_eth_link link;
1923
1924         int i, valid_slave = 0;
1925         uint8_t active_pos;
1926         uint8_t lsc_flag = 0;
1927
1928         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1929                 return;
1930
1931         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1932         slave_eth_dev = &rte_eth_devices[port_id];
1933
1934         if (check_for_bonded_ethdev(bonded_eth_dev))
1935                 return;
1936
1937         internals = bonded_eth_dev->data->dev_private;
1938
1939         /* If the device isn't started don't handle interrupts */
1940         if (!bonded_eth_dev->data->dev_started)
1941                 return;
1942
1943         /* verify that port_id is a valid slave of bonded port */
1944         for (i = 0; i < internals->slave_count; i++) {
1945                 if (internals->slaves[i].port_id == port_id) {
1946                         valid_slave = 1;
1947                         break;
1948                 }
1949         }
1950
1951         if (!valid_slave)
1952                 return;
1953
1954         /* Search for port in active port list */
1955         active_pos = find_slave_by_id(internals->active_slaves,
1956                         internals->active_slave_count, port_id);
1957
1958         rte_eth_link_get_nowait(port_id, &link);
1959         if (link.link_status) {
1960                 if (active_pos < internals->active_slave_count)
1961                         return;
1962
1963                 /* if no active slave ports then set this port to be primary port */
1964                 if (internals->active_slave_count < 1) {
1965                         /* If first active slave, then change link status */
1966                         bonded_eth_dev->data->dev_link.link_status = 1;
1967                         internals->current_primary_port = port_id;
1968                         lsc_flag = 1;
1969
1970                         mac_address_slaves_update(bonded_eth_dev);
1971
1972                         /* Inherit eth dev link properties from first active slave */
1973                         link_properties_set(bonded_eth_dev,
1974                                         &(slave_eth_dev->data->dev_link));
1975                 }
1976
1977                 activate_slave(bonded_eth_dev, port_id);
1978
1979                 /* If user has defined the primary port then default to using it */
1980                 if (internals->user_defined_primary_port &&
1981                                 internals->primary_port == port_id)
1982                         bond_ethdev_primary_set(internals, port_id);
1983         } else {
1984                 if (active_pos == internals->active_slave_count)
1985                         return;
1986
1987                 /* Remove from active slave list */
1988                 deactivate_slave(bonded_eth_dev, port_id);
1989
1990                 /* No active slaves, change link status to down and reset other
1991                  * link properties */
1992                 if (internals->active_slave_count < 1) {
1993                         lsc_flag = 1;
1994                         bonded_eth_dev->data->dev_link.link_status = 0;
1995
1996                         link_properties_reset(bonded_eth_dev);
1997                 }
1998
1999                 /* Update primary id, take first active slave from list or if none
2000                  * available set to -1 */
2001                 if (port_id == internals->current_primary_port) {
2002                         if (internals->active_slave_count > 0)
2003                                 bond_ethdev_primary_set(internals,
2004                                                 internals->active_slaves[0]);
2005                         else
2006                                 internals->current_primary_port = internals->primary_port;
2007                 }
2008         }
2009
2010         if (lsc_flag) {
2011                 /* Cancel any possible outstanding interrupts if delays are enabled */
2012                 if (internals->link_up_delay_ms > 0 ||
2013                         internals->link_down_delay_ms > 0)
2014                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2015                                         bonded_eth_dev);
2016
2017                 if (bonded_eth_dev->data->dev_link.link_status) {
2018                         if (internals->link_up_delay_ms > 0)
2019                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2020                                                 bond_ethdev_delayed_lsc_propagation,
2021                                                 (void *)bonded_eth_dev);
2022                         else
2023                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2024                                                 RTE_ETH_EVENT_INTR_LSC);
2025
2026                 } else {
2027                         if (internals->link_down_delay_ms > 0)
2028                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2029                                                 bond_ethdev_delayed_lsc_propagation,
2030                                                 (void *)bonded_eth_dev);
2031                         else
2032                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2033                                                 RTE_ETH_EVENT_INTR_LSC);
2034                 }
2035         }
2036 }
2037
2038 static int
2039 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2040                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2041 {
2042         unsigned i, j;
2043         int result = 0;
2044         int slave_reta_size;
2045         unsigned reta_count;
2046         struct bond_dev_private *internals = dev->data->dev_private;
2047
2048         if (reta_size != internals->reta_size)
2049                 return -EINVAL;
2050
2051          /* Copy RETA table */
2052         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2053
2054         for (i = 0; i < reta_count; i++) {
2055                 internals->reta_conf[i].mask = reta_conf[i].mask;
2056                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2057                         if ((reta_conf[i].mask >> j) & 0x01)
2058                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2059         }
2060
2061         /* Fill rest of array */
2062         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2063                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2064                                 sizeof(internals->reta_conf[0]) * reta_count);
2065
2066         /* Propagate RETA over slaves */
2067         for (i = 0; i < internals->slave_count; i++) {
2068                 slave_reta_size = internals->slaves[i].reta_size;
2069                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2070                                 &internals->reta_conf[0], slave_reta_size);
2071                 if (result < 0)
2072                         return result;
2073         }
2074
2075         return 0;
2076 }
2077
2078 static int
2079 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2080                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2081 {
2082         int i, j;
2083         struct bond_dev_private *internals = dev->data->dev_private;
2084
2085         if (reta_size != internals->reta_size)
2086                 return -EINVAL;
2087
2088          /* Copy RETA table */
2089         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2090                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2091                         if ((reta_conf[i].mask >> j) & 0x01)
2092                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2093
2094         return 0;
2095 }
2096
2097 static int
2098 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2099                 struct rte_eth_rss_conf *rss_conf)
2100 {
2101         int i, result = 0;
2102         struct bond_dev_private *internals = dev->data->dev_private;
2103         struct rte_eth_rss_conf bond_rss_conf;
2104
2105         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2106
2107         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2108
2109         if (bond_rss_conf.rss_hf != 0)
2110                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2111
2112         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2113                         sizeof(internals->rss_key)) {
2114                 if (bond_rss_conf.rss_key_len == 0)
2115                         bond_rss_conf.rss_key_len = 40;
2116                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2117                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2118                                 internals->rss_key_len);
2119         }
2120
2121         for (i = 0; i < internals->slave_count; i++) {
2122                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2123                                 &bond_rss_conf);
2124                 if (result < 0)
2125                         return result;
2126         }
2127
2128         return 0;
2129 }
2130
2131 static int
2132 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2133                 struct rte_eth_rss_conf *rss_conf)
2134 {
2135         struct bond_dev_private *internals = dev->data->dev_private;
2136
2137         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2138         rss_conf->rss_key_len = internals->rss_key_len;
2139         if (rss_conf->rss_key)
2140                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2141
2142         return 0;
2143 }
2144
2145 struct eth_dev_ops default_dev_ops = {
2146                 .dev_start            = bond_ethdev_start,
2147                 .dev_stop             = bond_ethdev_stop,
2148                 .dev_close            = bond_ethdev_close,
2149                 .dev_configure        = bond_ethdev_configure,
2150                 .dev_infos_get        = bond_ethdev_info,
2151                 .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2152                 .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2153                 .rx_queue_release     = bond_ethdev_rx_queue_release,
2154                 .tx_queue_release     = bond_ethdev_tx_queue_release,
2155                 .link_update          = bond_ethdev_link_update,
2156                 .stats_get            = bond_ethdev_stats_get,
2157                 .stats_reset          = bond_ethdev_stats_reset,
2158                 .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2159                 .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2160                 .reta_update          = bond_ethdev_rss_reta_update,
2161                 .reta_query           = bond_ethdev_rss_reta_query,
2162                 .rss_hash_update      = bond_ethdev_rss_hash_update,
2163                 .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2164 };
2165
2166 static int
2167 bond_init(const char *name, const char *params)
2168 {
2169         struct bond_dev_private *internals;
2170         struct rte_kvargs *kvlist;
2171         uint8_t bonding_mode, socket_id;
2172         int  arg_count, port_id;
2173
2174         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2175
2176         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2177         if (kvlist == NULL)
2178                 return -1;
2179
2180         /* Parse link bonding mode */
2181         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2182                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2183                                 &bond_ethdev_parse_slave_mode_kvarg,
2184                                 &bonding_mode) != 0) {
2185                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2186                                         name);
2187                         goto parse_error;
2188                 }
2189         } else {
2190                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2191                                 "device %s\n", name);
2192                 goto parse_error;
2193         }
2194
2195         /* Parse socket id to create bonding device on */
2196         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2197         if (arg_count == 1) {
2198                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2199                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2200                                 != 0) {
2201                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2202                                         "bonded device %s\n", name);
2203                         goto parse_error;
2204                 }
2205         } else if (arg_count > 1) {
2206                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2207                                 "bonded device %s\n", name);
2208                 goto parse_error;
2209         } else {
2210                 socket_id = rte_socket_id();
2211         }
2212
2213         /* Create link bonding eth device */
2214         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2215         if (port_id < 0) {
2216                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2217                                 "socket %u.\n", name, bonding_mode, socket_id);
2218                 goto parse_error;
2219         }
2220         internals = rte_eth_devices[port_id].data->dev_private;
2221         internals->kvlist = kvlist;
2222
2223         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2224                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2225         return 0;
2226
2227 parse_error:
2228         rte_kvargs_free(kvlist);
2229
2230         return -1;
2231 }
2232
2233 static int
2234 bond_uninit(const char *name)
2235 {
2236         int  ret;
2237
2238         if (name == NULL)
2239                 return -EINVAL;
2240
2241         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2242
2243         /* free link bonding eth device */
2244         ret = rte_eth_bond_free(name);
2245         if (ret < 0)
2246                 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2247
2248         return ret;
2249 }
2250
2251 /* this part will resolve the slave portids after all the other pdev and vdev
2252  * have been allocated */
2253 static int
2254 bond_ethdev_configure(struct rte_eth_dev *dev)
2255 {
2256         char *name = dev->data->name;
2257         struct bond_dev_private *internals = dev->data->dev_private;
2258         struct rte_kvargs *kvlist = internals->kvlist;
2259         int arg_count;
2260         uint8_t port_id = dev - rte_eth_devices;
2261
2262         static const uint8_t default_rss_key[40] = {
2263                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2264                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2265                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2266                 0xBE, 0xAC, 0x01, 0xFA
2267         };
2268
2269         unsigned i, j;
2270
2271         /* If RSS is enabled, fill table and key with default values */
2272         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2273                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2274                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2275                 memcpy(internals->rss_key, default_rss_key, 40);
2276
2277                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2278                         internals->reta_conf[i].mask = ~0LL;
2279                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2280                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2281                 }
2282         }
2283
2284         /*
2285          * if no kvlist, it means that this bonded device has been created
2286          * through the bonding api.
2287          */
2288         if (!kvlist)
2289                 return 0;
2290
2291         /* Parse MAC address for bonded device */
2292         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2293         if (arg_count == 1) {
2294                 struct ether_addr bond_mac;
2295
2296                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2297                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2298                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2299                                         name);
2300                         return -1;
2301                 }
2302
2303                 /* Set MAC address */
2304                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2305                         RTE_LOG(ERR, EAL,
2306                                         "Failed to set mac address on bonded device %s\n",
2307                                         name);
2308                         return -1;
2309                 }
2310         } else if (arg_count > 1) {
2311                 RTE_LOG(ERR, EAL,
2312                                 "MAC address can be specified only once for bonded device %s\n",
2313                                 name);
2314                 return -1;
2315         }
2316
2317         /* Parse/set balance mode transmit policy */
2318         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2319         if (arg_count == 1) {
2320                 uint8_t xmit_policy;
2321
2322                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2323                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2324                                                 0) {
2325                         RTE_LOG(INFO, EAL,
2326                                         "Invalid xmit policy specified for bonded device %s\n",
2327                                         name);
2328                         return -1;
2329                 }
2330
2331                 /* Set balance mode transmit policy*/
2332                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2333                         RTE_LOG(ERR, EAL,
2334                                         "Failed to set balance xmit policy on bonded device %s\n",
2335                                         name);
2336                         return -1;
2337                 }
2338         } else if (arg_count > 1) {
2339                 RTE_LOG(ERR, EAL,
2340                                 "Transmit policy can be specified only once for bonded device"
2341                                 " %s\n", name);
2342                 return -1;
2343         }
2344
2345         /* Parse/add slave ports to bonded device */
2346         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2347                 struct bond_ethdev_slave_ports slave_ports;
2348                 unsigned i;
2349
2350                 memset(&slave_ports, 0, sizeof(slave_ports));
2351
2352                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2353                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2354                         RTE_LOG(ERR, EAL,
2355                                         "Failed to parse slave ports for bonded device %s\n",
2356                                         name);
2357                         return -1;
2358                 }
2359
2360                 for (i = 0; i < slave_ports.slave_count; i++) {
2361                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2362                                 RTE_LOG(ERR, EAL,
2363                                                 "Failed to add port %d as slave to bonded device %s\n",
2364                                                 slave_ports.slaves[i], name);
2365                         }
2366                 }
2367
2368         } else {
2369                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2370                 return -1;
2371         }
2372
2373         /* Parse/set primary slave port id*/
2374         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2375         if (arg_count == 1) {
2376                 uint8_t primary_slave_port_id;
2377
2378                 if (rte_kvargs_process(kvlist,
2379                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
2380                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2381                                 &primary_slave_port_id) < 0) {
2382                         RTE_LOG(INFO, EAL,
2383                                         "Invalid primary slave port id specified for bonded device"
2384                                         " %s\n", name);
2385                         return -1;
2386                 }
2387
2388                 /* Set balance mode transmit policy*/
2389                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2390                                 != 0) {
2391                         RTE_LOG(ERR, EAL,
2392                                         "Failed to set primary slave port %d on bonded device %s\n",
2393                                         primary_slave_port_id, name);
2394                         return -1;
2395                 }
2396         } else if (arg_count > 1) {
2397                 RTE_LOG(INFO, EAL,
2398                                 "Primary slave can be specified only once for bonded device"
2399                                 " %s\n", name);
2400                 return -1;
2401         }
2402
2403         /* Parse link status monitor polling interval */
2404         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2405         if (arg_count == 1) {
2406                 uint32_t lsc_poll_interval_ms;
2407
2408                 if (rte_kvargs_process(kvlist,
2409                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2410                                 &bond_ethdev_parse_time_ms_kvarg,
2411                                 &lsc_poll_interval_ms) < 0) {
2412                         RTE_LOG(INFO, EAL,
2413                                         "Invalid lsc polling interval value specified for bonded"
2414                                         " device %s\n", name);
2415                         return -1;
2416                 }
2417
2418                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2419                                 != 0) {
2420                         RTE_LOG(ERR, EAL,
2421                                         "Failed to set lsc monitor polling interval (%u ms) on"
2422                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2423                         return -1;
2424                 }
2425         } else if (arg_count > 1) {
2426                 RTE_LOG(INFO, EAL,
2427                                 "LSC polling interval can be specified only once for bonded"
2428                                 " device %s\n", name);
2429                 return -1;
2430         }
2431
2432         /* Parse link up interrupt propagation delay */
2433         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2434         if (arg_count == 1) {
2435                 uint32_t link_up_delay_ms;
2436
2437                 if (rte_kvargs_process(kvlist,
2438                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2439                                 &bond_ethdev_parse_time_ms_kvarg,
2440                                 &link_up_delay_ms) < 0) {
2441                         RTE_LOG(INFO, EAL,
2442                                         "Invalid link up propagation delay value specified for"
2443                                         " bonded device %s\n", name);
2444                         return -1;
2445                 }
2446
2447                 /* Set balance mode transmit policy*/
2448                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2449                                 != 0) {
2450                         RTE_LOG(ERR, EAL,
2451                                         "Failed to set link up propagation delay (%u ms) on bonded"
2452                                         " device %s\n", link_up_delay_ms, name);
2453                         return -1;
2454                 }
2455         } else if (arg_count > 1) {
2456                 RTE_LOG(INFO, EAL,
2457                                 "Link up propagation delay can be specified only once for"
2458                                 " bonded device %s\n", name);
2459                 return -1;
2460         }
2461
2462         /* Parse link down interrupt propagation delay */
2463         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2464         if (arg_count == 1) {
2465                 uint32_t link_down_delay_ms;
2466
2467                 if (rte_kvargs_process(kvlist,
2468                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2469                                 &bond_ethdev_parse_time_ms_kvarg,
2470                                 &link_down_delay_ms) < 0) {
2471                         RTE_LOG(INFO, EAL,
2472                                         "Invalid link down propagation delay value specified for"
2473                                         " bonded device %s\n", name);
2474                         return -1;
2475                 }
2476
2477                 /* Set balance mode transmit policy*/
2478                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2479                                 != 0) {
2480                         RTE_LOG(ERR, EAL,
2481                                         "Failed to set link down propagation delay (%u ms) on"
2482                                         " bonded device %s\n", link_down_delay_ms, name);
2483                         return -1;
2484                 }
2485         } else if (arg_count > 1) {
2486                 RTE_LOG(INFO, EAL,
2487                                 "Link down propagation delay can be specified only once for"
2488                                 " bonded device %s\n", name);
2489                 return -1;
2490         }
2491
2492         return 0;
2493 }
2494
2495 static struct rte_driver bond_drv = {
2496         .name = "eth_bond",
2497         .type = PMD_VDEV,
2498         .init = bond_init,
2499         .uninit = bond_uninit,
2500 };
2501
2502 PMD_REGISTER_DRIVER(bond_drv);