bonding: support RSS dynamic configuration
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
44 #include <rte_dev.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
47
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
51
52 #define REORDER_PERIOD_MS 10
53
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
55
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
58
59 static inline size_t
60 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
61 {
62         size_t vlan_offset = 0;
63
64         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
65                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
66
67                 vlan_offset = sizeof(struct vlan_hdr);
68                 *proto = vlan_hdr->eth_proto;
69
70                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
71                         vlan_hdr = vlan_hdr + 1;
72                         *proto = vlan_hdr->eth_proto;
73                         vlan_offset += sizeof(struct vlan_hdr);
74                 }
75         }
76         return vlan_offset;
77 }
78
79 static uint16_t
80 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
81 {
82         struct bond_dev_private *internals;
83
84         uint16_t num_rx_slave = 0;
85         uint16_t num_rx_total = 0;
86
87         int i;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94
95         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
96                 /* Offset of pointer to *bufs increases as packets are received
97                  * from other slaves */
98                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
99                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
100                 if (num_rx_slave) {
101                         num_rx_total += num_rx_slave;
102                         nb_pkts -= num_rx_slave;
103                 }
104         }
105
106         return num_rx_total;
107 }
108
109 static uint16_t
110 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
111                 uint16_t nb_pkts)
112 {
113         struct bond_dev_private *internals;
114
115         /* Cast to structure, containing bonded device's port id and queue id */
116         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
117
118         internals = bd_rx_q->dev_private;
119
120         return rte_eth_rx_burst(internals->current_primary_port,
121                         bd_rx_q->queue_id, bufs, nb_pkts);
122 }
123
124 static uint16_t
125 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
126                 uint16_t nb_pkts)
127 {
128         /* Cast to structure, containing bonded device's port id and queue id */
129         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
130         struct bond_dev_private *internals = bd_rx_q->dev_private;
131         struct ether_addr bond_mac;
132
133         struct ether_hdr *hdr;
134
135         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136         uint16_t num_rx_total = 0;      /* Total number of received packets */
137         uint8_t slaves[RTE_MAX_ETHPORTS];
138         uint8_t slave_count;
139
140         uint8_t collecting;  /* current slave collecting status */
141         const uint8_t promisc = internals->promiscuous_en;
142         uint8_t i, j, k;
143
144         rte_eth_macaddr_get(internals->port_id, &bond_mac);
145         /* Copy slave list to protect against slave up/down changes during tx
146          * bursting */
147         slave_count = internals->active_slave_count;
148         memcpy(slaves, internals->active_slaves,
149                         sizeof(internals->active_slaves[0]) * slave_count);
150
151         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
152                 j = num_rx_total;
153                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
154
155                 /* Read packets from this slave */
156                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
157                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
158
159                 for (k = j; k < 2 && k < num_rx_total; k++)
160                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
161
162                 /* Handle slow protocol packets. */
163                 while (j < num_rx_total) {
164                         if (j + 3 < num_rx_total)
165                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
166
167                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
168                         /* Remove packet from array if it is slow packet or slave is not
169                          * in collecting state or bondign interface is not in promiscus
170                          * mode and packet address does not match. */
171                         if (unlikely(hdr->ether_type == ether_type_slow_be ||
172                                 !collecting || (!promisc &&
173                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
174
175                                 if (hdr->ether_type == ether_type_slow_be) {
176                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
177                                                 bufs[j]);
178                                 } else
179                                         rte_pktmbuf_free(bufs[j]);
180
181                                 /* Packet is managed by mode 4 or dropped, shift the array */
182                                 num_rx_total--;
183                                 if (j < num_rx_total) {
184                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
185                                                 (num_rx_total - j));
186                                 }
187                         } else
188                                 j++;
189                 }
190         }
191
192         return num_rx_total;
193 }
194
195 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
196 uint32_t burstnumberRX;
197 uint32_t burstnumberTX;
198
199 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
200
201 static void
202 arp_op_name(uint16_t arp_op, char *buf)
203 {
204         switch (arp_op) {
205         case ARP_OP_REQUEST:
206                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
207                 return;
208         case ARP_OP_REPLY:
209                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
210                 return;
211         case ARP_OP_REVREQUEST:
212                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
213                                 "Reverse ARP Request");
214                 return;
215         case ARP_OP_REVREPLY:
216                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
217                                 "Reverse ARP Reply");
218                 return;
219         case ARP_OP_INVREQUEST:
220                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
221                                 "Peer Identify Request");
222                 return;
223         case ARP_OP_INVREPLY:
224                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
225                                 "Peer Identify Reply");
226                 return;
227         default:
228                 break;
229         }
230         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
231         return;
232 }
233 #endif
234 #define MaxIPv4String   16
235 static void
236 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
237 {
238         uint32_t ipv4_addr;
239
240         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
241         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
242                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
243                 ipv4_addr & 0xFF);
244 }
245
246 #define MAX_CLIENTS_NUMBER      128
247 uint8_t active_clients;
248 struct client_stats_t {
249         uint8_t port;
250         uint32_t ipv4_addr;
251         uint32_t ipv4_rx_packets;
252         uint32_t ipv4_tx_packets;
253 };
254 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
255
256 static void
257 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
258 {
259         int i = 0;
260
261         for (; i < MAX_CLIENTS_NUMBER; i++)     {
262                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
263                         /* Just update RX packets number for this client */
264                         if (TXorRXindicator == &burstnumberRX)
265                                 client_stats[i].ipv4_rx_packets++;
266                         else
267                                 client_stats[i].ipv4_tx_packets++;
268                         return;
269                 }
270         }
271         /* We have a new client. Insert him to the table, and increment stats */
272         if (TXorRXindicator == &burstnumberRX)
273                 client_stats[active_clients].ipv4_rx_packets++;
274         else
275                 client_stats[active_clients].ipv4_tx_packets++;
276         client_stats[active_clients].ipv4_addr = addr;
277         client_stats[active_clients].port = port;
278         active_clients++;
279
280 }
281
282 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
283 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
284                 RTE_LOG(DEBUG, PMD, \
285                 "%s " \
286                 "port:%d " \
287                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
288                 "SrcIP:%s " \
289                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
290                 "DstIP:%s " \
291                 "%s " \
292                 "%d\n", \
293                 info, \
294                 port, \
295                 eth_h->s_addr.addr_bytes[0], \
296                 eth_h->s_addr.addr_bytes[1], \
297                 eth_h->s_addr.addr_bytes[2], \
298                 eth_h->s_addr.addr_bytes[3], \
299                 eth_h->s_addr.addr_bytes[4], \
300                 eth_h->s_addr.addr_bytes[5], \
301                 src_ip, \
302                 eth_h->d_addr.addr_bytes[0], \
303                 eth_h->d_addr.addr_bytes[1], \
304                 eth_h->d_addr.addr_bytes[2], \
305                 eth_h->d_addr.addr_bytes[3], \
306                 eth_h->d_addr.addr_bytes[4], \
307                 eth_h->d_addr.addr_bytes[5], \
308                 dst_ip, \
309                 arp_op, \
310                 ++burstnumber)
311 #endif
312
313 static void
314 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
315                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
316 {
317         struct ipv4_hdr *ipv4_h;
318 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
319         struct arp_hdr *arp_h;
320         char dst_ip[16];
321         char ArpOp[24];
322         char buf[16];
323 #endif
324         char src_ip[16];
325
326         uint16_t ether_type = eth_h->ether_type;
327         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
328
329 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
330         snprintf(buf, 16, "%s", info);
331 #endif
332
333         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
334                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
335                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
336 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
337                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
338                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
339 #endif
340                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
341         }
342 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
343         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
344                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
345                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
346                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
347                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
348                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
349         }
350 #endif
351 }
352 #endif
353
354 static uint16_t
355 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
356 {
357         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
358         struct bond_dev_private *internals = bd_tx_q->dev_private;
359         struct ether_hdr *eth_h;
360         uint16_t ether_type, offset;
361         uint16_t nb_recv_pkts;
362         int i;
363
364         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
365
366         for (i = 0; i < nb_recv_pkts; i++) {
367                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
368                 ether_type = eth_h->ether_type;
369                 offset = get_vlan_offset(eth_h, &ether_type);
370
371                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
372 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
373                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
374 #endif
375                         bond_mode_alb_arp_recv(eth_h, offset, internals);
376                 }
377 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
378                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
379                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
380 #endif
381         }
382
383         return nb_recv_pkts;
384 }
385
386 static uint16_t
387 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
388                 uint16_t nb_pkts)
389 {
390         struct bond_dev_private *internals;
391         struct bond_tx_queue *bd_tx_q;
392
393         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
394         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
395
396         uint8_t num_of_slaves;
397         uint8_t slaves[RTE_MAX_ETHPORTS];
398
399         uint16_t num_tx_total = 0, num_tx_slave;
400
401         static int slave_idx = 0;
402         int i, cslave_idx = 0, tx_fail_total = 0;
403
404         bd_tx_q = (struct bond_tx_queue *)queue;
405         internals = bd_tx_q->dev_private;
406
407         /* Copy slave list to protect against slave up/down changes during tx
408          * bursting */
409         num_of_slaves = internals->active_slave_count;
410         memcpy(slaves, internals->active_slaves,
411                         sizeof(internals->active_slaves[0]) * num_of_slaves);
412
413         if (num_of_slaves < 1)
414                 return num_tx_total;
415
416         /* Populate slaves mbuf with which packets are to be sent on it  */
417         for (i = 0; i < nb_pkts; i++) {
418                 cslave_idx = (slave_idx + i) % num_of_slaves;
419                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
420         }
421
422         /* increment current slave index so the next call to tx burst starts on the
423          * next slave */
424         slave_idx = ++cslave_idx;
425
426         /* Send packet burst on each slave device */
427         for (i = 0; i < num_of_slaves; i++) {
428                 if (slave_nb_pkts[i] > 0) {
429                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
430                                         slave_bufs[i], slave_nb_pkts[i]);
431
432                         /* if tx burst fails move packets to end of bufs */
433                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
434                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
435
436                                 tx_fail_total += tx_fail_slave;
437
438                                 memcpy(&bufs[nb_pkts - tx_fail_total],
439                                                 &slave_bufs[i][num_tx_slave],
440                                                 tx_fail_slave * sizeof(bufs[0]));
441                         }
442                         num_tx_total += num_tx_slave;
443                 }
444         }
445
446         return num_tx_total;
447 }
448
449 static uint16_t
450 bond_ethdev_tx_burst_active_backup(void *queue,
451                 struct rte_mbuf **bufs, uint16_t nb_pkts)
452 {
453         struct bond_dev_private *internals;
454         struct bond_tx_queue *bd_tx_q;
455
456         bd_tx_q = (struct bond_tx_queue *)queue;
457         internals = bd_tx_q->dev_private;
458
459         if (internals->active_slave_count < 1)
460                 return 0;
461
462         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
463                         bufs, nb_pkts);
464 }
465
466 static inline uint16_t
467 ether_hash(struct ether_hdr *eth_hdr)
468 {
469         unaligned_uint16_t *word_src_addr =
470                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
471         unaligned_uint16_t *word_dst_addr =
472                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
473
474         return (word_src_addr[0] ^ word_dst_addr[0]) ^
475                         (word_src_addr[1] ^ word_dst_addr[1]) ^
476                         (word_src_addr[2] ^ word_dst_addr[2]);
477 }
478
479 static inline uint32_t
480 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
481 {
482         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
483 }
484
485 static inline uint32_t
486 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
487 {
488         unaligned_uint32_t *word_src_addr =
489                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
490         unaligned_uint32_t *word_dst_addr =
491                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
492
493         return (word_src_addr[0] ^ word_dst_addr[0]) ^
494                         (word_src_addr[1] ^ word_dst_addr[1]) ^
495                         (word_src_addr[2] ^ word_dst_addr[2]) ^
496                         (word_src_addr[3] ^ word_dst_addr[3]);
497 }
498
499 uint16_t
500 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
501 {
502         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
503
504         uint32_t hash = ether_hash(eth_hdr);
505
506         return (hash ^= hash >> 8) % slave_count;
507 }
508
509 uint16_t
510 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
511 {
512         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
513         uint16_t proto = eth_hdr->ether_type;
514         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
515         uint32_t hash, l3hash = 0;
516
517         hash = ether_hash(eth_hdr);
518
519         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
520                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
521                                 ((char *)(eth_hdr + 1) + vlan_offset);
522                 l3hash = ipv4_hash(ipv4_hdr);
523
524         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
525                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
526                                 ((char *)(eth_hdr + 1) + vlan_offset);
527                 l3hash = ipv6_hash(ipv6_hdr);
528         }
529
530         hash = hash ^ l3hash;
531         hash ^= hash >> 16;
532         hash ^= hash >> 8;
533
534         return hash % slave_count;
535 }
536
537 uint16_t
538 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
539 {
540         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
541         uint16_t proto = eth_hdr->ether_type;
542         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
543
544         struct udp_hdr *udp_hdr = NULL;
545         struct tcp_hdr *tcp_hdr = NULL;
546         uint32_t hash, l3hash = 0, l4hash = 0;
547
548         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
549                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
550                                 ((char *)(eth_hdr + 1) + vlan_offset);
551                 size_t ip_hdr_offset;
552
553                 l3hash = ipv4_hash(ipv4_hdr);
554
555                 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
556                                 IPV4_IHL_MULTIPLIER;
557
558                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
559                         tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
560                                         ip_hdr_offset);
561                         l4hash = HASH_L4_PORTS(tcp_hdr);
562                 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
563                         udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
564                                         ip_hdr_offset);
565                         l4hash = HASH_L4_PORTS(udp_hdr);
566                 }
567         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
568                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
569                                 ((char *)(eth_hdr + 1) + vlan_offset);
570                 l3hash = ipv6_hash(ipv6_hdr);
571
572                 if (ipv6_hdr->proto == IPPROTO_TCP) {
573                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
574                         l4hash = HASH_L4_PORTS(tcp_hdr);
575                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
576                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
577                         l4hash = HASH_L4_PORTS(udp_hdr);
578                 }
579         }
580
581         hash = l3hash ^ l4hash;
582         hash ^= hash >> 16;
583         hash ^= hash >> 8;
584
585         return hash % slave_count;
586 }
587
588 struct bwg_slave {
589         uint64_t bwg_left_int;
590         uint64_t bwg_left_remainder;
591         uint8_t slave;
592 };
593
594 void
595 bond_tlb_activate_slave(struct bond_dev_private *internals) {
596         int i;
597
598         for (i = 0; i < internals->active_slave_count; i++) {
599                 tlb_last_obytets[internals->active_slaves[i]] = 0;
600         }
601 }
602
603 static int
604 bandwidth_cmp(const void *a, const void *b)
605 {
606         const struct bwg_slave *bwg_a = a;
607         const struct bwg_slave *bwg_b = b;
608         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
609         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
610                         (int64_t)bwg_a->bwg_left_remainder;
611         if (diff > 0)
612                 return 1;
613         else if (diff < 0)
614                 return -1;
615         else if (diff2 > 0)
616                 return 1;
617         else if (diff2 < 0)
618                 return -1;
619         else
620                 return 0;
621 }
622
623 static void
624 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
625                 struct bwg_slave *bwg_slave)
626 {
627         struct rte_eth_link link_status;
628
629         rte_eth_link_get(port_id, &link_status);
630         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
631         if (link_bwg == 0)
632                 return;
633         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
634         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
635         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
636 }
637
638 static void
639 bond_ethdev_update_tlb_slave_cb(void *arg)
640 {
641         struct bond_dev_private *internals = arg;
642         struct rte_eth_stats slave_stats;
643         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
644         uint8_t slave_count;
645         uint64_t tx_bytes;
646
647         uint8_t update_stats = 0;
648         uint8_t i, slave_id;
649
650         internals->slave_update_idx++;
651
652
653         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
654                 update_stats = 1;
655
656         for (i = 0; i < internals->active_slave_count; i++) {
657                 slave_id = internals->active_slaves[i];
658                 rte_eth_stats_get(slave_id, &slave_stats);
659                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
660                 bandwidth_left(slave_id, tx_bytes,
661                                 internals->slave_update_idx, &bwg_array[i]);
662                 bwg_array[i].slave = slave_id;
663
664                 if (update_stats) {
665                         tlb_last_obytets[slave_id] = slave_stats.obytes;
666                 }
667         }
668
669         if (update_stats == 1)
670                 internals->slave_update_idx = 0;
671
672         slave_count = i;
673         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
674         for (i = 0; i < slave_count; i++)
675                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
676
677         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
678                         (struct bond_dev_private *)internals);
679 }
680
681 static uint16_t
682 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
683 {
684         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
685         struct bond_dev_private *internals = bd_tx_q->dev_private;
686
687         struct rte_eth_dev *primary_port =
688                         &rte_eth_devices[internals->primary_port];
689         uint16_t num_tx_total = 0;
690         uint8_t i, j;
691
692         uint8_t num_of_slaves = internals->active_slave_count;
693         uint8_t slaves[RTE_MAX_ETHPORTS];
694
695         struct ether_hdr *ether_hdr;
696         struct ether_addr primary_slave_addr;
697         struct ether_addr active_slave_addr;
698
699         if (num_of_slaves < 1)
700                 return num_tx_total;
701
702         memcpy(slaves, internals->tlb_slaves_order,
703                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
704
705
706         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
707
708         if (nb_pkts > 3) {
709                 for (i = 0; i < 3; i++)
710                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
711         }
712
713         for (i = 0; i < num_of_slaves; i++) {
714                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
715                 for (j = num_tx_total; j < nb_pkts; j++) {
716                         if (j + 3 < nb_pkts)
717                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
718
719                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
720                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
721                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
722 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
723                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
724 #endif
725                 }
726
727                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
728                                 bufs + num_tx_total, nb_pkts - num_tx_total);
729
730                 if (num_tx_total == nb_pkts)
731                         break;
732         }
733
734         return num_tx_total;
735 }
736
737 void
738 bond_tlb_disable(struct bond_dev_private *internals)
739 {
740         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
741 }
742
743 void
744 bond_tlb_enable(struct bond_dev_private *internals)
745 {
746         bond_ethdev_update_tlb_slave_cb(internals);
747 }
748
749 static uint16_t
750 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
751 {
752         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
753         struct bond_dev_private *internals = bd_tx_q->dev_private;
754
755         struct ether_hdr *eth_h;
756         uint16_t ether_type, offset;
757
758         struct client_data *client_info;
759
760         /*
761          * We create transmit buffers for every slave and one additional to send
762          * through tlb. In worst case every packet will be send on one port.
763          */
764         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
765         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
766
767         /*
768          * We create separate transmit buffers for update packets as they wont be
769          * counted in num_tx_total.
770          */
771         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
772         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
773
774         struct rte_mbuf *upd_pkt;
775         size_t pkt_size;
776
777         uint16_t num_send, num_not_send = 0;
778         uint16_t num_tx_total = 0;
779         uint8_t slave_idx;
780
781         int i, j;
782
783         /* Search tx buffer for ARP packets and forward them to alb */
784         for (i = 0; i < nb_pkts; i++) {
785                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
786                 ether_type = eth_h->ether_type;
787                 offset = get_vlan_offset(eth_h, &ether_type);
788
789                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
790                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
791
792                         /* Change src mac in eth header */
793                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
794
795                         /* Add packet to slave tx buffer */
796                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
797                         slave_bufs_pkts[slave_idx]++;
798                 } else {
799                         /* If packet is not ARP, send it with TLB policy */
800                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
801                                         bufs[i];
802                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
803                 }
804         }
805
806         /* Update connected client ARP tables */
807         if (internals->mode6.ntt) {
808                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
809                         client_info = &internals->mode6.client_table[i];
810
811                         if (client_info->in_use) {
812                                 /* Allocate new packet to send ARP update on current slave */
813                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
814                                 if (upd_pkt == NULL) {
815                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
816                                         continue;
817                                 }
818                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
819                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
820                                 upd_pkt->data_len = pkt_size;
821                                 upd_pkt->pkt_len = pkt_size;
822
823                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
824                                                 internals);
825
826                                 /* Add packet to update tx buffer */
827                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
828                                 update_bufs_pkts[slave_idx]++;
829                         }
830                 }
831                 internals->mode6.ntt = 0;
832         }
833
834         /* Send ARP packets on proper slaves */
835         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
836                 if (slave_bufs_pkts[i] > 0) {
837                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
838                                         slave_bufs[i], slave_bufs_pkts[i]);
839                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
840                                 bufs[nb_pkts - 1 - num_not_send - j] =
841                                                 slave_bufs[i][nb_pkts - 1 - j];
842                         }
843
844                         num_tx_total += num_send;
845                         num_not_send += slave_bufs_pkts[i] - num_send;
846
847 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
848         /* Print TX stats including update packets */
849                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
850                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
851                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
852                         }
853 #endif
854                 }
855         }
856
857         /* Send update packets on proper slaves */
858         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
859                 if (update_bufs_pkts[i] > 0) {
860                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
861                                         update_bufs_pkts[i]);
862                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
863                                 rte_pktmbuf_free(update_bufs[i][j]);
864                         }
865 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
866                         for (j = 0; j < update_bufs_pkts[i]; j++) {
867                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
868                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
869                         }
870 #endif
871                 }
872         }
873
874         /* Send non-ARP packets using tlb policy */
875         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
876                 num_send = bond_ethdev_tx_burst_tlb(queue,
877                                 slave_bufs[RTE_MAX_ETHPORTS],
878                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
879
880                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
881                         bufs[nb_pkts - 1 - num_not_send - j] =
882                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
883                 }
884
885                 num_tx_total += num_send;
886                 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
887         }
888
889         return num_tx_total;
890 }
891
892 static uint16_t
893 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
894                 uint16_t nb_pkts)
895 {
896         struct bond_dev_private *internals;
897         struct bond_tx_queue *bd_tx_q;
898
899         uint8_t num_of_slaves;
900         uint8_t slaves[RTE_MAX_ETHPORTS];
901
902         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
903
904         int i, op_slave_id;
905
906         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
907         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
908
909         bd_tx_q = (struct bond_tx_queue *)queue;
910         internals = bd_tx_q->dev_private;
911
912         /* Copy slave list to protect against slave up/down changes during tx
913          * bursting */
914         num_of_slaves = internals->active_slave_count;
915         memcpy(slaves, internals->active_slaves,
916                         sizeof(internals->active_slaves[0]) * num_of_slaves);
917
918         if (num_of_slaves < 1)
919                 return num_tx_total;
920
921         /* Populate slaves mbuf with the packets which are to be sent on it  */
922         for (i = 0; i < nb_pkts; i++) {
923                 /* Select output slave using hash based on xmit policy */
924                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
925
926                 /* Populate slave mbuf arrays with mbufs for that slave */
927                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
928         }
929
930         /* Send packet burst on each slave device */
931         for (i = 0; i < num_of_slaves; i++) {
932                 if (slave_nb_pkts[i] > 0) {
933                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
934                                         slave_bufs[i], slave_nb_pkts[i]);
935
936                         /* if tx burst fails move packets to end of bufs */
937                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
938                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
939
940                                 tx_fail_total += slave_tx_fail_count;
941                                 memcpy(&bufs[nb_pkts - tx_fail_total],
942                                                 &slave_bufs[i][num_tx_slave],
943                                                 slave_tx_fail_count * sizeof(bufs[0]));
944                         }
945
946                         num_tx_total += num_tx_slave;
947                 }
948         }
949
950         return num_tx_total;
951 }
952
953 static uint16_t
954 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
955                 uint16_t nb_pkts)
956 {
957         struct bond_dev_private *internals;
958         struct bond_tx_queue *bd_tx_q;
959
960         uint8_t num_of_slaves;
961         uint8_t slaves[RTE_MAX_ETHPORTS];
962          /* positions in slaves, not ID */
963         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
964         uint8_t distributing_count;
965
966         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
967         uint16_t i, j, op_slave_idx;
968         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
969
970         /* Allocate additional packets in case 8023AD mode. */
971         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
972         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
973
974         /* Total amount of packets in slave_bufs */
975         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
976         /* Slow packets placed in each slave */
977         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
978
979         bd_tx_q = (struct bond_tx_queue *)queue;
980         internals = bd_tx_q->dev_private;
981
982         /* Copy slave list to protect against slave up/down changes during tx
983          * bursting */
984         num_of_slaves = internals->active_slave_count;
985         if (num_of_slaves < 1)
986                 return num_tx_total;
987
988         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
989
990         distributing_count = 0;
991         for (i = 0; i < num_of_slaves; i++) {
992                 struct port *port = &mode_8023ad_ports[slaves[i]];
993
994                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
995                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
996                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
997
998                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
999                         slave_bufs[i][j] = slow_pkts[j];
1000
1001                 if (ACTOR_STATE(port, DISTRIBUTING))
1002                         distributing_offsets[distributing_count++] = i;
1003         }
1004
1005         if (likely(distributing_count > 0)) {
1006                 /* Populate slaves mbuf with the packets which are to be sent on it */
1007                 for (i = 0; i < nb_pkts; i++) {
1008                         /* Select output slave using hash based on xmit policy */
1009                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1010
1011                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1012                          * slaves that are currently distributing. */
1013                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1014                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1015                         slave_nb_pkts[slave_offset]++;
1016                 }
1017         }
1018
1019         /* Send packet burst on each slave device */
1020         for (i = 0; i < num_of_slaves; i++) {
1021                 if (slave_nb_pkts[i] == 0)
1022                         continue;
1023
1024                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1025                                 slave_bufs[i], slave_nb_pkts[i]);
1026
1027                 /* If tx burst fails drop slow packets */
1028                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1029                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1030
1031                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1032                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1033
1034                 /* If tx burst fails move packets to end of bufs */
1035                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1036                         uint16_t j = nb_pkts - num_tx_fail_total;
1037                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1038                                 bufs[j] = slave_bufs[i][num_tx_slave];
1039                 }
1040         }
1041
1042         return num_tx_total;
1043 }
1044
1045 static uint16_t
1046 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1047                 uint16_t nb_pkts)
1048 {
1049         struct bond_dev_private *internals;
1050         struct bond_tx_queue *bd_tx_q;
1051
1052         uint8_t tx_failed_flag = 0, num_of_slaves;
1053         uint8_t slaves[RTE_MAX_ETHPORTS];
1054
1055         uint16_t max_nb_of_tx_pkts = 0;
1056
1057         int slave_tx_total[RTE_MAX_ETHPORTS];
1058         int i, most_successful_tx_slave = -1;
1059
1060         bd_tx_q = (struct bond_tx_queue *)queue;
1061         internals = bd_tx_q->dev_private;
1062
1063         /* Copy slave list to protect against slave up/down changes during tx
1064          * bursting */
1065         num_of_slaves = internals->active_slave_count;
1066         memcpy(slaves, internals->active_slaves,
1067                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1068
1069         if (num_of_slaves < 1)
1070                 return 0;
1071
1072         /* Increment reference count on mbufs */
1073         for (i = 0; i < nb_pkts; i++)
1074                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1075
1076         /* Transmit burst on each active slave */
1077         for (i = 0; i < num_of_slaves; i++) {
1078                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1079                                         bufs, nb_pkts);
1080
1081                 if (unlikely(slave_tx_total[i] < nb_pkts))
1082                         tx_failed_flag = 1;
1083
1084                 /* record the value and slave index for the slave which transmits the
1085                  * maximum number of packets */
1086                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1087                         max_nb_of_tx_pkts = slave_tx_total[i];
1088                         most_successful_tx_slave = i;
1089                 }
1090         }
1091
1092         /* if slaves fail to transmit packets from burst, the calling application
1093          * is not expected to know about multiple references to packets so we must
1094          * handle failures of all packets except those of the most successful slave
1095          */
1096         if (unlikely(tx_failed_flag))
1097                 for (i = 0; i < num_of_slaves; i++)
1098                         if (i != most_successful_tx_slave)
1099                                 while (slave_tx_total[i] < nb_pkts)
1100                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1101
1102         return max_nb_of_tx_pkts;
1103 }
1104
1105 void
1106 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1107                 struct rte_eth_link *slave_dev_link)
1108 {
1109         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1110         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1111
1112         if (slave_dev_link->link_status &&
1113                 bonded_eth_dev->data->dev_started) {
1114                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1115                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1116
1117                 internals->link_props_set = 1;
1118         }
1119 }
1120
1121 void
1122 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1123 {
1124         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1125
1126         memset(&(bonded_eth_dev->data->dev_link), 0,
1127                         sizeof(bonded_eth_dev->data->dev_link));
1128
1129         internals->link_props_set = 0;
1130 }
1131
1132 int
1133 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1134                 struct rte_eth_link *slave_dev_link)
1135 {
1136         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1137                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1138                 return -1;
1139
1140         return 0;
1141 }
1142
1143 int
1144 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1145 {
1146         struct ether_addr *mac_addr;
1147
1148         if (eth_dev == NULL) {
1149                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1150                 return -1;
1151         }
1152
1153         if (dst_mac_addr == NULL) {
1154                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1155                 return -1;
1156         }
1157
1158         mac_addr = eth_dev->data->mac_addrs;
1159
1160         ether_addr_copy(mac_addr, dst_mac_addr);
1161         return 0;
1162 }
1163
1164 int
1165 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1166 {
1167         struct ether_addr *mac_addr;
1168
1169         if (eth_dev == NULL) {
1170                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1171                 return -1;
1172         }
1173
1174         if (new_mac_addr == NULL) {
1175                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1176                 return -1;
1177         }
1178
1179         mac_addr = eth_dev->data->mac_addrs;
1180
1181         /* If new MAC is different to current MAC then update */
1182         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1183                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1184
1185         return 0;
1186 }
1187
1188 int
1189 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1190 {
1191         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1192         int i;
1193
1194         /* Update slave devices MAC addresses */
1195         if (internals->slave_count < 1)
1196                 return -1;
1197
1198         switch (internals->mode) {
1199         case BONDING_MODE_ROUND_ROBIN:
1200         case BONDING_MODE_BALANCE:
1201         case BONDING_MODE_BROADCAST:
1202                 for (i = 0; i < internals->slave_count; i++) {
1203                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1204                                         bonded_eth_dev->data->mac_addrs)) {
1205                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1206                                                 internals->slaves[i].port_id);
1207                                 return -1;
1208                         }
1209                 }
1210                 break;
1211         case BONDING_MODE_8023AD:
1212                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1213                 break;
1214         case BONDING_MODE_ACTIVE_BACKUP:
1215         case BONDING_MODE_TLB:
1216         case BONDING_MODE_ALB:
1217         default:
1218                 for (i = 0; i < internals->slave_count; i++) {
1219                         if (internals->slaves[i].port_id ==
1220                                         internals->current_primary_port) {
1221                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1222                                                 bonded_eth_dev->data->mac_addrs)) {
1223                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1224                                                         internals->current_primary_port);
1225                                         return -1;
1226                                 }
1227                         } else {
1228                                 if (mac_address_set(
1229                                                 &rte_eth_devices[internals->slaves[i].port_id],
1230                                                 &internals->slaves[i].persisted_mac_addr)) {
1231                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1232                                                         internals->slaves[i].port_id);
1233                                         return -1;
1234                                 }
1235                         }
1236                 }
1237         }
1238
1239         return 0;
1240 }
1241
1242 int
1243 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1244 {
1245         struct bond_dev_private *internals;
1246
1247         internals = eth_dev->data->dev_private;
1248
1249         switch (mode) {
1250         case BONDING_MODE_ROUND_ROBIN:
1251                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1252                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1253                 break;
1254         case BONDING_MODE_ACTIVE_BACKUP:
1255                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1256                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1257                 break;
1258         case BONDING_MODE_BALANCE:
1259                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1260                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1261                 break;
1262         case BONDING_MODE_BROADCAST:
1263                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1264                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1265                 break;
1266         case BONDING_MODE_8023AD:
1267                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1268                         return -1;
1269
1270                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1271                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1272                 RTE_LOG(WARNING, PMD,
1273                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1274                                 "at least every 100ms.\n");
1275                 break;
1276         case BONDING_MODE_TLB:
1277                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1278                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1279                 break;
1280         case BONDING_MODE_ALB:
1281                 if (bond_mode_alb_enable(eth_dev) != 0)
1282                         return -1;
1283
1284                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1285                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1286                 break;
1287         default:
1288                 return -1;
1289         }
1290
1291         internals->mode = mode;
1292
1293         return 0;
1294 }
1295
1296 int
1297 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1298                 struct rte_eth_dev *slave_eth_dev)
1299 {
1300         struct bond_rx_queue *bd_rx_q;
1301         struct bond_tx_queue *bd_tx_q;
1302
1303         int errval;
1304         uint16_t q_id;
1305
1306         /* Stop slave */
1307         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1308
1309         /* Enable interrupts on slave device if supported */
1310         if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
1311                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1312
1313         /* If RSS is enabled for bonding, try to enable it for slaves  */
1314         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1315                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1316                                 != 0) {
1317                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1318                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1319                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1320                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1321                 } else {
1322                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1323                 }
1324
1325                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1326                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1327                 slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
1328         }
1329
1330         /* Configure device */
1331         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1332                         bonded_eth_dev->data->nb_rx_queues,
1333                         bonded_eth_dev->data->nb_tx_queues,
1334                         &(slave_eth_dev->data->dev_conf));
1335         if (errval != 0) {
1336                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1337                                 slave_eth_dev->data->port_id, errval);
1338                 return errval;
1339         }
1340
1341         /* Setup Rx Queues */
1342         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1343                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1344
1345                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1346                                 bd_rx_q->nb_rx_desc,
1347                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1348                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1349                 if (errval != 0) {
1350                         RTE_BOND_LOG(ERR,
1351                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1352                                         slave_eth_dev->data->port_id, q_id, errval);
1353                         return errval;
1354                 }
1355         }
1356
1357         /* Setup Tx Queues */
1358         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1359                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1360
1361                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1362                                 bd_tx_q->nb_tx_desc,
1363                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1364                                 &bd_tx_q->tx_conf);
1365                 if (errval != 0) {
1366                         RTE_BOND_LOG(ERR,
1367                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1368                                         slave_eth_dev->data->port_id, q_id, errval);
1369                         return errval;
1370                 }
1371         }
1372
1373         /* Start device */
1374         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1375         if (errval != 0) {
1376                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1377                                 slave_eth_dev->data->port_id, errval);
1378                 return -1;
1379         }
1380
1381         /* If RSS is enabled for bonding, synchronize RETA */
1382         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1383                 int i;
1384                 struct bond_dev_private *internals;
1385
1386                 internals = bonded_eth_dev->data->dev_private;
1387
1388                 for (i = 0; i < internals->slave_count; i++) {
1389                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1390                                 errval = rte_eth_dev_rss_reta_update(
1391                                                 slave_eth_dev->data->port_id,
1392                                                 &internals->reta_conf[0],
1393                                                 internals->slaves[i].reta_size);
1394                                 if (errval != 0) {
1395                                         RTE_LOG(WARNING, PMD,
1396                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1397                                                         " RSS Configuration for bonding may be inconsistent.\n",
1398                                                         slave_eth_dev->data->port_id, errval);
1399                                 }
1400                                 break;
1401                         }
1402                 }
1403         }
1404
1405         /* If lsc interrupt is set, check initial slave's link status */
1406         if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
1407                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1408                                 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1409
1410         return 0;
1411 }
1412
1413 void
1414 slave_remove(struct bond_dev_private *internals,
1415                 struct rte_eth_dev *slave_eth_dev)
1416 {
1417         uint8_t i;
1418
1419         for (i = 0; i < internals->slave_count; i++)
1420                 if (internals->slaves[i].port_id ==
1421                                 slave_eth_dev->data->port_id)
1422                         break;
1423
1424         if (i < (internals->slave_count - 1))
1425                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1426                                 sizeof(internals->slaves[0]) *
1427                                 (internals->slave_count - i - 1));
1428
1429         internals->slave_count--;
1430 }
1431
1432 static void
1433 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1434
1435 void
1436 slave_add(struct bond_dev_private *internals,
1437                 struct rte_eth_dev *slave_eth_dev)
1438 {
1439         struct bond_slave_details *slave_details =
1440                         &internals->slaves[internals->slave_count];
1441
1442         slave_details->port_id = slave_eth_dev->data->port_id;
1443         slave_details->last_link_status = 0;
1444
1445         /* If slave device doesn't support interrupts then we need to enabled
1446          * polling to monitor link status */
1447         if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
1448                 slave_details->link_status_poll_enabled = 1;
1449
1450                 if (!internals->link_status_polling_enabled) {
1451                         internals->link_status_polling_enabled = 1;
1452
1453                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1454                                         bond_ethdev_slave_link_status_change_monitor,
1455                                         (void *)&rte_eth_devices[internals->port_id]);
1456                 }
1457         }
1458
1459         slave_details->link_status_wait_to_complete = 0;
1460         /* clean tlb_last_obytes when adding port for bonding device */
1461         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1462                         sizeof(struct ether_addr));
1463 }
1464
1465 void
1466 bond_ethdev_primary_set(struct bond_dev_private *internals,
1467                 uint8_t slave_port_id)
1468 {
1469         int i;
1470
1471         if (internals->active_slave_count < 1)
1472                 internals->current_primary_port = slave_port_id;
1473         else
1474                 /* Search bonded device slave ports for new proposed primary port */
1475                 for (i = 0; i < internals->active_slave_count; i++) {
1476                         if (internals->active_slaves[i] == slave_port_id)
1477                                 internals->current_primary_port = slave_port_id;
1478                 }
1479 }
1480
1481 static void
1482 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1483
1484 static int
1485 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1486 {
1487         struct bond_dev_private *internals;
1488         int i;
1489
1490         /* slave eth dev will be started by bonded device */
1491         if (valid_bonded_ethdev(eth_dev)) {
1492                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1493                                 eth_dev->data->port_id);
1494                 return -1;
1495         }
1496
1497         eth_dev->data->dev_link.link_status = 0;
1498         eth_dev->data->dev_started = 1;
1499
1500         internals = eth_dev->data->dev_private;
1501
1502         if (internals->slave_count == 0) {
1503                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1504                 return -1;
1505         }
1506
1507         if (internals->user_defined_mac == 0) {
1508                 struct ether_addr *new_mac_addr = NULL;
1509
1510                 for (i = 0; i < internals->slave_count; i++)
1511                         if (internals->slaves[i].port_id == internals->primary_port)
1512                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1513
1514                 if (new_mac_addr == NULL)
1515                         return -1;
1516
1517                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1518                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1519                                         eth_dev->data->port_id);
1520                         return -1;
1521                 }
1522         }
1523
1524         /* Update all slave devices MACs*/
1525         if (mac_address_slaves_update(eth_dev) != 0)
1526                 return -1;
1527
1528         /* If bonded device is configure in promiscuous mode then re-apply config */
1529         if (internals->promiscuous_en)
1530                 bond_ethdev_promiscuous_enable(eth_dev);
1531
1532         /* Reconfigure each slave device if starting bonded device */
1533         for (i = 0; i < internals->slave_count; i++) {
1534                 if (slave_configure(eth_dev,
1535                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1536                         RTE_BOND_LOG(ERR,
1537                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1538                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1539                         return -1;
1540                 }
1541         }
1542
1543         if (internals->user_defined_primary_port)
1544                 bond_ethdev_primary_set(internals, internals->primary_port);
1545
1546         if (internals->mode == BONDING_MODE_8023AD)
1547                 bond_mode_8023ad_start(eth_dev);
1548
1549         if (internals->mode == BONDING_MODE_TLB ||
1550                         internals->mode == BONDING_MODE_ALB)
1551                 bond_tlb_enable(internals);
1552
1553         return 0;
1554 }
1555
1556 static void
1557 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1558 {
1559         uint8_t i;
1560
1561         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1562                 rte_free(dev->data->rx_queues[i]);
1563                 dev->data->rx_queues[i] = NULL;
1564         }
1565         dev->data->nb_rx_queues = 0;
1566
1567         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1568                 rte_free(dev->data->tx_queues[i]);
1569                 dev->data->tx_queues[i] = NULL;
1570         }
1571         dev->data->nb_tx_queues = 0;
1572 }
1573
1574 void
1575 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1576 {
1577         struct bond_dev_private *internals = eth_dev->data->dev_private;
1578         uint8_t i;
1579
1580         if (internals->mode == BONDING_MODE_8023AD) {
1581                 struct port *port;
1582                 void *pkt = NULL;
1583
1584                 bond_mode_8023ad_stop(eth_dev);
1585
1586                 /* Discard all messages to/from mode 4 state machines */
1587                 for (i = 0; i < internals->active_slave_count; i++) {
1588                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1589
1590                         RTE_VERIFY(port->rx_ring != NULL);
1591                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1592                                 rte_pktmbuf_free(pkt);
1593
1594                         RTE_VERIFY(port->tx_ring != NULL);
1595                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1596                                 rte_pktmbuf_free(pkt);
1597                 }
1598         }
1599
1600         if (internals->mode == BONDING_MODE_TLB ||
1601                         internals->mode == BONDING_MODE_ALB) {
1602                 bond_tlb_disable(internals);
1603                 for (i = 0; i < internals->active_slave_count; i++)
1604                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1605         }
1606
1607         internals->active_slave_count = 0;
1608         internals->link_status_polling_enabled = 0;
1609
1610         eth_dev->data->dev_link.link_status = 0;
1611         eth_dev->data->dev_started = 0;
1612 }
1613
1614 void
1615 bond_ethdev_close(struct rte_eth_dev *dev)
1616 {
1617         bond_ethdev_free_queues(dev);
1618 }
1619
1620 /* forward declaration */
1621 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1622
1623 static void
1624 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1625 {
1626         struct bond_dev_private *internals = dev->data->dev_private;
1627
1628         dev_info->max_mac_addrs = 1;
1629
1630         dev_info->max_rx_pktlen = (uint32_t)2048;
1631
1632         dev_info->max_rx_queues = (uint16_t)128;
1633         dev_info->max_tx_queues = (uint16_t)512;
1634
1635         dev_info->min_rx_bufsize = 0;
1636         dev_info->pci_dev = dev->pci_dev;
1637
1638         dev_info->rx_offload_capa = internals->rx_offload_capa;
1639         dev_info->tx_offload_capa = internals->tx_offload_capa;
1640         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1641
1642         dev_info->reta_size = internals->reta_size;
1643 }
1644
1645 static int
1646 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1647                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1648                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1649 {
1650         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1651                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1652                                         0, dev->pci_dev->numa_node);
1653         if (bd_rx_q == NULL)
1654                 return -1;
1655
1656         bd_rx_q->queue_id = rx_queue_id;
1657         bd_rx_q->dev_private = dev->data->dev_private;
1658
1659         bd_rx_q->nb_rx_desc = nb_rx_desc;
1660
1661         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1662         bd_rx_q->mb_pool = mb_pool;
1663
1664         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1665
1666         return 0;
1667 }
1668
1669 static int
1670 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1671                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1672                 const struct rte_eth_txconf *tx_conf)
1673 {
1674         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1675                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1676                                         0, dev->pci_dev->numa_node);
1677
1678         if (bd_tx_q == NULL)
1679                 return -1;
1680
1681         bd_tx_q->queue_id = tx_queue_id;
1682         bd_tx_q->dev_private = dev->data->dev_private;
1683
1684         bd_tx_q->nb_tx_desc = nb_tx_desc;
1685         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1686
1687         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1688
1689         return 0;
1690 }
1691
1692 static void
1693 bond_ethdev_rx_queue_release(void *queue)
1694 {
1695         if (queue == NULL)
1696                 return;
1697
1698         rte_free(queue);
1699 }
1700
1701 static void
1702 bond_ethdev_tx_queue_release(void *queue)
1703 {
1704         if (queue == NULL)
1705                 return;
1706
1707         rte_free(queue);
1708 }
1709
1710 static void
1711 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1712 {
1713         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1714         struct bond_dev_private *internals;
1715
1716         /* Default value for polling slave found is true as we don't want to
1717          * disable the polling thread if we cannot get the lock */
1718         int i, polling_slave_found = 1;
1719
1720         if (cb_arg == NULL)
1721                 return;
1722
1723         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1724         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1725
1726         if (!bonded_ethdev->data->dev_started ||
1727                 !internals->link_status_polling_enabled)
1728                 return;
1729
1730         /* If device is currently being configured then don't check slaves link
1731          * status, wait until next period */
1732         if (rte_spinlock_trylock(&internals->lock)) {
1733                 if (internals->slave_count > 0)
1734                         polling_slave_found = 0;
1735
1736                 for (i = 0; i < internals->slave_count; i++) {
1737                         if (!internals->slaves[i].link_status_poll_enabled)
1738                                 continue;
1739
1740                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1741                         polling_slave_found = 1;
1742
1743                         /* Update slave link status */
1744                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1745                                         internals->slaves[i].link_status_wait_to_complete);
1746
1747                         /* if link status has changed since last checked then call lsc
1748                          * event callback */
1749                         if (slave_ethdev->data->dev_link.link_status !=
1750                                         internals->slaves[i].last_link_status) {
1751                                 internals->slaves[i].last_link_status =
1752                                                 slave_ethdev->data->dev_link.link_status;
1753
1754                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1755                                                 RTE_ETH_EVENT_INTR_LSC,
1756                                                 &bonded_ethdev->data->port_id);
1757                         }
1758                 }
1759                 rte_spinlock_unlock(&internals->lock);
1760         }
1761
1762         if (polling_slave_found)
1763                 /* Set alarm to continue monitoring link status of slave ethdev's */
1764                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1765                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1766 }
1767
1768 static int
1769 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1770                 int wait_to_complete)
1771 {
1772         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1773
1774         if (!bonded_eth_dev->data->dev_started ||
1775                 internals->active_slave_count == 0) {
1776                 bonded_eth_dev->data->dev_link.link_status = 0;
1777                 return 0;
1778         } else {
1779                 struct rte_eth_dev *slave_eth_dev;
1780                 int i, link_up = 0;
1781
1782                 for (i = 0; i < internals->active_slave_count; i++) {
1783                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1784
1785                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1786                                         wait_to_complete);
1787                         if (slave_eth_dev->data->dev_link.link_status == 1) {
1788                                 link_up = 1;
1789                                 break;
1790                         }
1791                 }
1792
1793                 bonded_eth_dev->data->dev_link.link_status = link_up;
1794         }
1795
1796         return 0;
1797 }
1798
1799 static void
1800 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1801 {
1802         struct bond_dev_private *internals = dev->data->dev_private;
1803         struct rte_eth_stats slave_stats;
1804         int i;
1805
1806         for (i = 0; i < internals->slave_count; i++) {
1807                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1808
1809                 stats->ipackets += slave_stats.ipackets;
1810                 stats->opackets += slave_stats.opackets;
1811                 stats->ibytes += slave_stats.ibytes;
1812                 stats->obytes += slave_stats.obytes;
1813                 stats->ierrors += slave_stats.ierrors;
1814                 stats->oerrors += slave_stats.oerrors;
1815                 stats->imcasts += slave_stats.imcasts;
1816                 stats->rx_nombuf += slave_stats.rx_nombuf;
1817                 stats->fdirmatch += slave_stats.fdirmatch;
1818                 stats->fdirmiss += slave_stats.fdirmiss;
1819                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1820                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1821                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1822                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1823         }
1824 }
1825
1826 static void
1827 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1828 {
1829         struct bond_dev_private *internals = dev->data->dev_private;
1830         int i;
1831
1832         for (i = 0; i < internals->slave_count; i++)
1833                 rte_eth_stats_reset(internals->slaves[i].port_id);
1834 }
1835
1836 static void
1837 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1838 {
1839         struct bond_dev_private *internals = eth_dev->data->dev_private;
1840         int i;
1841
1842         internals->promiscuous_en = 1;
1843
1844         switch (internals->mode) {
1845         /* Promiscuous mode is propagated to all slaves */
1846         case BONDING_MODE_ROUND_ROBIN:
1847         case BONDING_MODE_BALANCE:
1848         case BONDING_MODE_BROADCAST:
1849                 for (i = 0; i < internals->slave_count; i++)
1850                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1851                 break;
1852         /* In mode4 promiscus mode is managed when slave is added/removed */
1853         case BONDING_MODE_8023AD:
1854                 break;
1855         /* Promiscuous mode is propagated only to primary slave */
1856         case BONDING_MODE_ACTIVE_BACKUP:
1857         case BONDING_MODE_TLB:
1858         case BONDING_MODE_ALB:
1859         default:
1860                 rte_eth_promiscuous_enable(internals->current_primary_port);
1861         }
1862 }
1863
1864 static void
1865 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1866 {
1867         struct bond_dev_private *internals = dev->data->dev_private;
1868         int i;
1869
1870         internals->promiscuous_en = 0;
1871
1872         switch (internals->mode) {
1873         /* Promiscuous mode is propagated to all slaves */
1874         case BONDING_MODE_ROUND_ROBIN:
1875         case BONDING_MODE_BALANCE:
1876         case BONDING_MODE_BROADCAST:
1877                 for (i = 0; i < internals->slave_count; i++)
1878                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1879                 break;
1880         /* In mode4 promiscus mode is set managed when slave is added/removed */
1881         case BONDING_MODE_8023AD:
1882                 break;
1883         /* Promiscuous mode is propagated only to primary slave */
1884         case BONDING_MODE_ACTIVE_BACKUP:
1885         case BONDING_MODE_TLB:
1886         case BONDING_MODE_ALB:
1887         default:
1888                 rte_eth_promiscuous_disable(internals->current_primary_port);
1889         }
1890 }
1891
1892 static void
1893 bond_ethdev_delayed_lsc_propagation(void *arg)
1894 {
1895         if (arg == NULL)
1896                 return;
1897
1898         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1899                         RTE_ETH_EVENT_INTR_LSC);
1900 }
1901
1902 void
1903 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1904                 void *param)
1905 {
1906         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1907         struct bond_dev_private *internals;
1908         struct rte_eth_link link;
1909
1910         int i, valid_slave = 0;
1911         uint8_t active_pos;
1912         uint8_t lsc_flag = 0;
1913
1914         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1915                 return;
1916
1917         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1918         slave_eth_dev = &rte_eth_devices[port_id];
1919
1920         if (valid_bonded_ethdev(bonded_eth_dev))
1921                 return;
1922
1923         internals = bonded_eth_dev->data->dev_private;
1924
1925         /* If the device isn't started don't handle interrupts */
1926         if (!bonded_eth_dev->data->dev_started)
1927                 return;
1928
1929         /* verify that port_id is a valid slave of bonded port */
1930         for (i = 0; i < internals->slave_count; i++) {
1931                 if (internals->slaves[i].port_id == port_id) {
1932                         valid_slave = 1;
1933                         break;
1934                 }
1935         }
1936
1937         if (!valid_slave)
1938                 return;
1939
1940         /* Search for port in active port list */
1941         active_pos = find_slave_by_id(internals->active_slaves,
1942                         internals->active_slave_count, port_id);
1943
1944         rte_eth_link_get_nowait(port_id, &link);
1945         if (link.link_status) {
1946                 if (active_pos < internals->active_slave_count)
1947                         return;
1948
1949                 /* if no active slave ports then set this port to be primary port */
1950                 if (internals->active_slave_count < 1) {
1951                         /* If first active slave, then change link status */
1952                         bonded_eth_dev->data->dev_link.link_status = 1;
1953                         internals->current_primary_port = port_id;
1954                         lsc_flag = 1;
1955
1956                         mac_address_slaves_update(bonded_eth_dev);
1957
1958                         /* Inherit eth dev link properties from first active slave */
1959                         link_properties_set(bonded_eth_dev,
1960                                         &(slave_eth_dev->data->dev_link));
1961                 }
1962
1963                 activate_slave(bonded_eth_dev, port_id);
1964
1965                 /* If user has defined the primary port then default to using it */
1966                 if (internals->user_defined_primary_port &&
1967                                 internals->primary_port == port_id)
1968                         bond_ethdev_primary_set(internals, port_id);
1969         } else {
1970                 if (active_pos == internals->active_slave_count)
1971                         return;
1972
1973                 /* Remove from active slave list */
1974                 deactivate_slave(bonded_eth_dev, port_id);
1975
1976                 /* No active slaves, change link status to down and reset other
1977                  * link properties */
1978                 if (internals->active_slave_count < 1) {
1979                         lsc_flag = 1;
1980                         bonded_eth_dev->data->dev_link.link_status = 0;
1981
1982                         link_properties_reset(bonded_eth_dev);
1983                 }
1984
1985                 /* Update primary id, take first active slave from list or if none
1986                  * available set to -1 */
1987                 if (port_id == internals->current_primary_port) {
1988                         if (internals->active_slave_count > 0)
1989                                 bond_ethdev_primary_set(internals,
1990                                                 internals->active_slaves[0]);
1991                         else
1992                                 internals->current_primary_port = internals->primary_port;
1993                 }
1994         }
1995
1996         if (lsc_flag) {
1997                 /* Cancel any possible outstanding interrupts if delays are enabled */
1998                 if (internals->link_up_delay_ms > 0 ||
1999                         internals->link_down_delay_ms > 0)
2000                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2001                                         bonded_eth_dev);
2002
2003                 if (bonded_eth_dev->data->dev_link.link_status) {
2004                         if (internals->link_up_delay_ms > 0)
2005                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2006                                                 bond_ethdev_delayed_lsc_propagation,
2007                                                 (void *)bonded_eth_dev);
2008                         else
2009                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2010                                                 RTE_ETH_EVENT_INTR_LSC);
2011
2012                 } else {
2013                         if (internals->link_down_delay_ms > 0)
2014                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2015                                                 bond_ethdev_delayed_lsc_propagation,
2016                                                 (void *)bonded_eth_dev);
2017                         else
2018                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2019                                                 RTE_ETH_EVENT_INTR_LSC);
2020                 }
2021         }
2022 }
2023
2024 static int
2025 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2026                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2027 {
2028         unsigned i, j;
2029         int result = 0;
2030         int slave_reta_size;
2031         unsigned reta_count;
2032         struct bond_dev_private *internals = dev->data->dev_private;
2033
2034         if (reta_size != internals->reta_size)
2035                 return -EINVAL;
2036
2037          /* Copy RETA table */
2038         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2039
2040         for (i = 0; i < reta_count; i++) {
2041                 internals->reta_conf[i].mask = reta_conf[i].mask;
2042                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2043                         if ((reta_conf[i].mask >> j) & 0x01)
2044                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2045         }
2046
2047         /* Fill rest of array */
2048         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2049                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2050                                 sizeof(internals->reta_conf[0]) * reta_count);
2051
2052         /* Propagate RETA over slaves */
2053         for (i = 0; i < internals->slave_count; i++) {
2054                 slave_reta_size = internals->slaves[i].reta_size;
2055                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2056                                 &internals->reta_conf[0], slave_reta_size);
2057                 if (result < 0)
2058                         return result;
2059         }
2060
2061         return 0;
2062 }
2063
2064 static int
2065 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2066                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2067 {
2068         int i, j;
2069         struct bond_dev_private *internals = dev->data->dev_private;
2070
2071         if (reta_size != internals->reta_size)
2072                 return -EINVAL;
2073
2074          /* Copy RETA table */
2075         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2076                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2077                         if ((reta_conf[i].mask >> j) & 0x01)
2078                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2079
2080         return 0;
2081 }
2082
2083 static int
2084 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2085                 struct rte_eth_rss_conf *rss_conf)
2086 {
2087         int i, result = 0;
2088         struct bond_dev_private *internals = dev->data->dev_private;
2089         struct rte_eth_rss_conf bond_rss_conf;
2090
2091         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2092
2093         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2094
2095         if (bond_rss_conf.rss_hf != 0)
2096                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2097
2098         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2099                         sizeof(internals->rss_key)) {
2100                 if (bond_rss_conf.rss_key_len == 0)
2101                         bond_rss_conf.rss_key_len = 40;
2102                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2103                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2104                                 internals->rss_key_len);
2105         }
2106
2107         for (i = 0; i < internals->slave_count; i++) {
2108                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2109                                 &bond_rss_conf);
2110                 if (result < 0)
2111                         return result;
2112         }
2113
2114         return 0;
2115 }
2116
2117 static int
2118 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2119                 struct rte_eth_rss_conf *rss_conf)
2120 {
2121         struct bond_dev_private *internals = dev->data->dev_private;
2122
2123         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2124         rss_conf->rss_key_len = internals->rss_key_len;
2125         if (rss_conf->rss_key)
2126                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2127
2128         return 0;
2129 }
2130
2131 struct eth_dev_ops default_dev_ops = {
2132                 .dev_start            = bond_ethdev_start,
2133                 .dev_stop             = bond_ethdev_stop,
2134                 .dev_close            = bond_ethdev_close,
2135                 .dev_configure        = bond_ethdev_configure,
2136                 .dev_infos_get        = bond_ethdev_info,
2137                 .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2138                 .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2139                 .rx_queue_release     = bond_ethdev_rx_queue_release,
2140                 .tx_queue_release     = bond_ethdev_tx_queue_release,
2141                 .link_update          = bond_ethdev_link_update,
2142                 .stats_get            = bond_ethdev_stats_get,
2143                 .stats_reset          = bond_ethdev_stats_reset,
2144                 .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2145                 .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2146                 .reta_update          = bond_ethdev_rss_reta_update,
2147                 .reta_query           = bond_ethdev_rss_reta_query,
2148                 .rss_hash_update      = bond_ethdev_rss_hash_update,
2149                 .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2150 };
2151
2152 static int
2153 bond_init(const char *name, const char *params)
2154 {
2155         struct bond_dev_private *internals;
2156         struct rte_kvargs *kvlist;
2157         uint8_t bonding_mode, socket_id;
2158         int  arg_count, port_id;
2159
2160         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2161
2162         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2163         if (kvlist == NULL)
2164                 return -1;
2165
2166         /* Parse link bonding mode */
2167         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2168                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2169                                 &bond_ethdev_parse_slave_mode_kvarg,
2170                                 &bonding_mode) != 0) {
2171                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2172                                         name);
2173                         goto parse_error;
2174                 }
2175         } else {
2176                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2177                                 "device %s\n", name);
2178                 goto parse_error;
2179         }
2180
2181         /* Parse socket id to create bonding device on */
2182         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2183         if (arg_count == 1) {
2184                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2185                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2186                                 != 0) {
2187                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2188                                         "bonded device %s\n", name);
2189                         goto parse_error;
2190                 }
2191         } else if (arg_count > 1) {
2192                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2193                                 "bonded device %s\n", name);
2194                 goto parse_error;
2195         } else {
2196                 socket_id = rte_socket_id();
2197         }
2198
2199         /* Create link bonding eth device */
2200         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2201         if (port_id < 0) {
2202                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2203                                 "socket %u.\n", name, bonding_mode, socket_id);
2204                 goto parse_error;
2205         }
2206         internals = rte_eth_devices[port_id].data->dev_private;
2207         internals->kvlist = kvlist;
2208
2209         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2210                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2211         return 0;
2212
2213 parse_error:
2214         rte_kvargs_free(kvlist);
2215
2216         return -1;
2217 }
2218
2219 static int
2220 bond_uninit(const char *name)
2221 {
2222         int  ret;
2223
2224         if (name == NULL)
2225                 return -EINVAL;
2226
2227         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2228
2229         /* free link bonding eth device */
2230         ret = rte_eth_bond_free(name);
2231         if (ret < 0)
2232                 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2233
2234         return ret;
2235 }
2236
2237 /* this part will resolve the slave portids after all the other pdev and vdev
2238  * have been allocated */
2239 static int
2240 bond_ethdev_configure(struct rte_eth_dev *dev)
2241 {
2242         char *name = dev->data->name;
2243         struct bond_dev_private *internals = dev->data->dev_private;
2244         struct rte_kvargs *kvlist = internals->kvlist;
2245         int arg_count;
2246         uint8_t port_id = dev - rte_eth_devices;
2247
2248         static const uint8_t default_rss_key[40] = {
2249                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2250                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2251                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2252                 0xBE, 0xAC, 0x01, 0xFA
2253         };
2254
2255         unsigned i, j;
2256
2257         /* If RSS is enabled, fill table and key with default values */
2258         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2259                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2260                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2261                 memcpy(internals->rss_key, default_rss_key, 40);
2262
2263                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2264                         internals->reta_conf[i].mask = ~0LL;
2265                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2266                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2267                 }
2268         }
2269
2270         /*
2271          * if no kvlist, it means that this bonded device has been created
2272          * through the bonding api.
2273          */
2274         if (!kvlist)
2275                 return 0;
2276
2277         /* Parse MAC address for bonded device */
2278         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2279         if (arg_count == 1) {
2280                 struct ether_addr bond_mac;
2281
2282                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2283                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2284                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2285                                         name);
2286                         return -1;
2287                 }
2288
2289                 /* Set MAC address */
2290                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2291                         RTE_LOG(ERR, EAL,
2292                                         "Failed to set mac address on bonded device %s\n",
2293                                         name);
2294                         return -1;
2295                 }
2296         } else if (arg_count > 1) {
2297                 RTE_LOG(ERR, EAL,
2298                                 "MAC address can be specified only once for bonded device %s\n",
2299                                 name);
2300                 return -1;
2301         }
2302
2303         /* Parse/set balance mode transmit policy */
2304         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2305         if (arg_count == 1) {
2306                 uint8_t xmit_policy;
2307
2308                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2309                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2310                                                 0) {
2311                         RTE_LOG(INFO, EAL,
2312                                         "Invalid xmit policy specified for bonded device %s\n",
2313                                         name);
2314                         return -1;
2315                 }
2316
2317                 /* Set balance mode transmit policy*/
2318                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2319                         RTE_LOG(ERR, EAL,
2320                                         "Failed to set balance xmit policy on bonded device %s\n",
2321                                         name);
2322                         return -1;
2323                 }
2324         } else if (arg_count > 1) {
2325                 RTE_LOG(ERR, EAL,
2326                                 "Transmit policy can be specified only once for bonded device"
2327                                 " %s\n", name);
2328                 return -1;
2329         }
2330
2331         /* Parse/add slave ports to bonded device */
2332         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2333                 struct bond_ethdev_slave_ports slave_ports;
2334                 unsigned i;
2335
2336                 memset(&slave_ports, 0, sizeof(slave_ports));
2337
2338                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2339                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2340                         RTE_LOG(ERR, EAL,
2341                                         "Failed to parse slave ports for bonded device %s\n",
2342                                         name);
2343                         return -1;
2344                 }
2345
2346                 for (i = 0; i < slave_ports.slave_count; i++) {
2347                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2348                                 RTE_LOG(ERR, EAL,
2349                                                 "Failed to add port %d as slave to bonded device %s\n",
2350                                                 slave_ports.slaves[i], name);
2351                         }
2352                 }
2353
2354         } else {
2355                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2356                 return -1;
2357         }
2358
2359         /* Parse/set primary slave port id*/
2360         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2361         if (arg_count == 1) {
2362                 uint8_t primary_slave_port_id;
2363
2364                 if (rte_kvargs_process(kvlist,
2365                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
2366                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2367                                 &primary_slave_port_id) < 0) {
2368                         RTE_LOG(INFO, EAL,
2369                                         "Invalid primary slave port id specified for bonded device"
2370                                         " %s\n", name);
2371                         return -1;
2372                 }
2373
2374                 /* Set balance mode transmit policy*/
2375                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2376                                 != 0) {
2377                         RTE_LOG(ERR, EAL,
2378                                         "Failed to set primary slave port %d on bonded device %s\n",
2379                                         primary_slave_port_id, name);
2380                         return -1;
2381                 }
2382         } else if (arg_count > 1) {
2383                 RTE_LOG(INFO, EAL,
2384                                 "Primary slave can be specified only once for bonded device"
2385                                 " %s\n", name);
2386                 return -1;
2387         }
2388
2389         /* Parse link status monitor polling interval */
2390         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2391         if (arg_count == 1) {
2392                 uint32_t lsc_poll_interval_ms;
2393
2394                 if (rte_kvargs_process(kvlist,
2395                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2396                                 &bond_ethdev_parse_time_ms_kvarg,
2397                                 &lsc_poll_interval_ms) < 0) {
2398                         RTE_LOG(INFO, EAL,
2399                                         "Invalid lsc polling interval value specified for bonded"
2400                                         " device %s\n", name);
2401                         return -1;
2402                 }
2403
2404                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2405                                 != 0) {
2406                         RTE_LOG(ERR, EAL,
2407                                         "Failed to set lsc monitor polling interval (%u ms) on"
2408                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2409                         return -1;
2410                 }
2411         } else if (arg_count > 1) {
2412                 RTE_LOG(INFO, EAL,
2413                                 "LSC polling interval can be specified only once for bonded"
2414                                 " device %s\n", name);
2415                 return -1;
2416         }
2417
2418         /* Parse link up interrupt propagation delay */
2419         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2420         if (arg_count == 1) {
2421                 uint32_t link_up_delay_ms;
2422
2423                 if (rte_kvargs_process(kvlist,
2424                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2425                                 &bond_ethdev_parse_time_ms_kvarg,
2426                                 &link_up_delay_ms) < 0) {
2427                         RTE_LOG(INFO, EAL,
2428                                         "Invalid link up propagation delay value specified for"
2429                                         " bonded device %s\n", name);
2430                         return -1;
2431                 }
2432
2433                 /* Set balance mode transmit policy*/
2434                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2435                                 != 0) {
2436                         RTE_LOG(ERR, EAL,
2437                                         "Failed to set link up propagation delay (%u ms) on bonded"
2438                                         " device %s\n", link_up_delay_ms, name);
2439                         return -1;
2440                 }
2441         } else if (arg_count > 1) {
2442                 RTE_LOG(INFO, EAL,
2443                                 "Link up propagation delay can be specified only once for"
2444                                 " bonded device %s\n", name);
2445                 return -1;
2446         }
2447
2448         /* Parse link down interrupt propagation delay */
2449         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2450         if (arg_count == 1) {
2451                 uint32_t link_down_delay_ms;
2452
2453                 if (rte_kvargs_process(kvlist,
2454                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2455                                 &bond_ethdev_parse_time_ms_kvarg,
2456                                 &link_down_delay_ms) < 0) {
2457                         RTE_LOG(INFO, EAL,
2458                                         "Invalid link down propagation delay value specified for"
2459                                         " bonded device %s\n", name);
2460                         return -1;
2461                 }
2462
2463                 /* Set balance mode transmit policy*/
2464                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2465                                 != 0) {
2466                         RTE_LOG(ERR, EAL,
2467                                         "Failed to set link down propagation delay (%u ms) on"
2468                                         " bonded device %s\n", link_down_delay_ms, name);
2469                         return -1;
2470                 }
2471         } else if (arg_count > 1) {
2472                 RTE_LOG(INFO, EAL,
2473                                 "Link down propagation delay can be specified only once for"
2474                                 " bonded device %s\n", name);
2475                 return -1;
2476         }
2477
2478         return 0;
2479 }
2480
2481 static struct rte_driver bond_drv = {
2482         .name = "eth_bond",
2483         .type = PMD_VDEV,
2484         .init = bond_init,
2485         .uninit = bond_uninit,
2486 };
2487
2488 PMD_REGISTER_DRIVER(bond_drv);