remove extra parentheses in return statement
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_ip_frag.h>
43 #include <rte_devargs.h>
44 #include <rte_kvargs.h>
45 #include <rte_dev.h>
46 #include <rte_alarm.h>
47 #include <rte_cycles.h>
48
49 #include "rte_eth_bond.h"
50 #include "rte_eth_bond_private.h"
51 #include "rte_eth_bond_8023ad_private.h"
52
53 #define REORDER_PERIOD_MS 10
54
55 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56
57 /* Table for statistics in mode 5 TLB */
58 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
59
60 static inline size_t
61 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62 {
63         size_t vlan_offset = 0;
64
65         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
66                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
67
68                 vlan_offset = sizeof(struct vlan_hdr);
69                 *proto = vlan_hdr->eth_proto;
70
71                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
72                         vlan_hdr = vlan_hdr + 1;
73                         *proto = vlan_hdr->eth_proto;
74                         vlan_offset += sizeof(struct vlan_hdr);
75                 }
76         }
77         return vlan_offset;
78 }
79
80 static uint16_t
81 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
82 {
83         struct bond_dev_private *internals;
84
85         uint16_t num_rx_slave = 0;
86         uint16_t num_rx_total = 0;
87
88         int i;
89
90         /* Cast to structure, containing bonded device's port id and queue id */
91         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92
93         internals = bd_rx_q->dev_private;
94
95
96         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
97                 /* Offset of pointer to *bufs increases as packets are received
98                  * from other slaves */
99                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
100                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
101                 if (num_rx_slave) {
102                         num_rx_total += num_rx_slave;
103                         nb_pkts -= num_rx_slave;
104                 }
105         }
106
107         return num_rx_total;
108 }
109
110 static uint16_t
111 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
112                 uint16_t nb_pkts)
113 {
114         struct bond_dev_private *internals;
115
116         /* Cast to structure, containing bonded device's port id and queue id */
117         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
118
119         internals = bd_rx_q->dev_private;
120
121         return rte_eth_rx_burst(internals->current_primary_port,
122                         bd_rx_q->queue_id, bufs, nb_pkts);
123 }
124
125 static uint16_t
126 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
127                 uint16_t nb_pkts)
128 {
129         /* Cast to structure, containing bonded device's port id and queue id */
130         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
131         struct bond_dev_private *internals = bd_rx_q->dev_private;
132         struct ether_addr bond_mac;
133
134         struct ether_hdr *hdr;
135
136         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
137         uint16_t num_rx_total = 0;      /* Total number of received packets */
138         uint8_t slaves[RTE_MAX_ETHPORTS];
139         uint8_t slave_count;
140
141         uint8_t collecting;  /* current slave collecting status */
142         const uint8_t promisc = internals->promiscuous_en;
143         uint8_t i, j, k;
144
145         rte_eth_macaddr_get(internals->port_id, &bond_mac);
146         /* Copy slave list to protect against slave up/down changes during tx
147          * bursting */
148         slave_count = internals->active_slave_count;
149         memcpy(slaves, internals->active_slaves,
150                         sizeof(internals->active_slaves[0]) * slave_count);
151
152         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
153                 j = num_rx_total;
154                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
155
156                 /* Read packets from this slave */
157                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
158                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
159
160                 for (k = j; k < 2 && k < num_rx_total; k++)
161                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
162
163                 /* Handle slow protocol packets. */
164                 while (j < num_rx_total) {
165                         if (j + 3 < num_rx_total)
166                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
167
168                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
169                         /* Remove packet from array if it is slow packet or slave is not
170                          * in collecting state or bondign interface is not in promiscus
171                          * mode and packet address does not match. */
172                         if (unlikely(hdr->ether_type == ether_type_slow_be ||
173                                 !collecting || (!promisc &&
174                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
175
176                                 if (hdr->ether_type == ether_type_slow_be) {
177                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
178                                                 bufs[j]);
179                                 } else
180                                         rte_pktmbuf_free(bufs[j]);
181
182                                 /* Packet is managed by mode 4 or dropped, shift the array */
183                                 num_rx_total--;
184                                 if (j < num_rx_total) {
185                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
186                                                 (num_rx_total - j));
187                                 }
188                         } else
189                                 j++;
190                 }
191         }
192
193         return num_rx_total;
194 }
195
196 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
197 uint32_t burstnumberRX;
198 uint32_t burstnumberTX;
199
200 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
201
202 static void
203 arp_op_name(uint16_t arp_op, char *buf)
204 {
205         switch (arp_op) {
206         case ARP_OP_REQUEST:
207                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
208                 return;
209         case ARP_OP_REPLY:
210                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
211                 return;
212         case ARP_OP_REVREQUEST:
213                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
214                                 "Reverse ARP Request");
215                 return;
216         case ARP_OP_REVREPLY:
217                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
218                                 "Reverse ARP Reply");
219                 return;
220         case ARP_OP_INVREQUEST:
221                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
222                                 "Peer Identify Request");
223                 return;
224         case ARP_OP_INVREPLY:
225                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
226                                 "Peer Identify Reply");
227                 return;
228         default:
229                 break;
230         }
231         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
232         return;
233 }
234 #endif
235 #define MaxIPv4String   16
236 static void
237 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
238 {
239         uint32_t ipv4_addr;
240
241         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
242         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
243                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
244                 ipv4_addr & 0xFF);
245 }
246
247 #define MAX_CLIENTS_NUMBER      128
248 uint8_t active_clients;
249 struct client_stats_t {
250         uint8_t port;
251         uint32_t ipv4_addr;
252         uint32_t ipv4_rx_packets;
253         uint32_t ipv4_tx_packets;
254 };
255 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
256
257 static void
258 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
259 {
260         int i = 0;
261
262         for (; i < MAX_CLIENTS_NUMBER; i++)     {
263                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
264                         /* Just update RX packets number for this client */
265                         if (TXorRXindicator == &burstnumberRX)
266                                 client_stats[i].ipv4_rx_packets++;
267                         else
268                                 client_stats[i].ipv4_tx_packets++;
269                         return;
270                 }
271         }
272         /* We have a new client. Insert him to the table, and increment stats */
273         if (TXorRXindicator == &burstnumberRX)
274                 client_stats[active_clients].ipv4_rx_packets++;
275         else
276                 client_stats[active_clients].ipv4_tx_packets++;
277         client_stats[active_clients].ipv4_addr = addr;
278         client_stats[active_clients].port = port;
279         active_clients++;
280
281 }
282
283 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
284 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
285                 RTE_LOG(DEBUG, PMD, \
286                 "%s " \
287                 "port:%d " \
288                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
289                 "SrcIP:%s " \
290                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
291                 "DstIP:%s " \
292                 "%s " \
293                 "%d\n", \
294                 info, \
295                 port, \
296                 eth_h->s_addr.addr_bytes[0], \
297                 eth_h->s_addr.addr_bytes[1], \
298                 eth_h->s_addr.addr_bytes[2], \
299                 eth_h->s_addr.addr_bytes[3], \
300                 eth_h->s_addr.addr_bytes[4], \
301                 eth_h->s_addr.addr_bytes[5], \
302                 src_ip, \
303                 eth_h->d_addr.addr_bytes[0], \
304                 eth_h->d_addr.addr_bytes[1], \
305                 eth_h->d_addr.addr_bytes[2], \
306                 eth_h->d_addr.addr_bytes[3], \
307                 eth_h->d_addr.addr_bytes[4], \
308                 eth_h->d_addr.addr_bytes[5], \
309                 dst_ip, \
310                 arp_op, \
311                 ++burstnumber)
312 #endif
313
314 static void
315 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
316                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
317 {
318         struct ipv4_hdr *ipv4_h;
319 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
320         struct arp_hdr *arp_h;
321         char dst_ip[16];
322         char ArpOp[24];
323         char buf[16];
324 #endif
325         char src_ip[16];
326
327         uint16_t ether_type = eth_h->ether_type;
328         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
329
330 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
331         snprintf(buf, 16, "%s", info);
332 #endif
333
334         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
335                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
336                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
337 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
338                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
339                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
340 #endif
341                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
342         }
343 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
344         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
345                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
346                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
347                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
348                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
349                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
350         }
351 #endif
352 }
353 #endif
354
355 static uint16_t
356 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
357 {
358         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
359         struct bond_dev_private *internals = bd_tx_q->dev_private;
360         struct ether_hdr *eth_h;
361         uint16_t ether_type, offset;
362         uint16_t nb_recv_pkts;
363         int i;
364
365         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
366
367         for (i = 0; i < nb_recv_pkts; i++) {
368                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
369                 ether_type = eth_h->ether_type;
370                 offset = get_vlan_offset(eth_h, &ether_type);
371
372                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
373 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
374                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
375 #endif
376                         bond_mode_alb_arp_recv(eth_h, offset, internals);
377                 }
378 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
379                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
380                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
381 #endif
382         }
383
384         return nb_recv_pkts;
385 }
386
387 static uint16_t
388 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
389                 uint16_t nb_pkts)
390 {
391         struct bond_dev_private *internals;
392         struct bond_tx_queue *bd_tx_q;
393
394         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
395         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
396
397         uint8_t num_of_slaves;
398         uint8_t slaves[RTE_MAX_ETHPORTS];
399
400         uint16_t num_tx_total = 0, num_tx_slave;
401
402         static int slave_idx = 0;
403         int i, cslave_idx = 0, tx_fail_total = 0;
404
405         bd_tx_q = (struct bond_tx_queue *)queue;
406         internals = bd_tx_q->dev_private;
407
408         /* Copy slave list to protect against slave up/down changes during tx
409          * bursting */
410         num_of_slaves = internals->active_slave_count;
411         memcpy(slaves, internals->active_slaves,
412                         sizeof(internals->active_slaves[0]) * num_of_slaves);
413
414         if (num_of_slaves < 1)
415                 return num_tx_total;
416
417         /* Populate slaves mbuf with which packets are to be sent on it  */
418         for (i = 0; i < nb_pkts; i++) {
419                 cslave_idx = (slave_idx + i) % num_of_slaves;
420                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
421         }
422
423         /* increment current slave index so the next call to tx burst starts on the
424          * next slave */
425         slave_idx = ++cslave_idx;
426
427         /* Send packet burst on each slave device */
428         for (i = 0; i < num_of_slaves; i++) {
429                 if (slave_nb_pkts[i] > 0) {
430                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
431                                         slave_bufs[i], slave_nb_pkts[i]);
432
433                         /* if tx burst fails move packets to end of bufs */
434                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
435                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
436
437                                 tx_fail_total += tx_fail_slave;
438
439                                 memcpy(&bufs[nb_pkts - tx_fail_total],
440                                                 &slave_bufs[i][num_tx_slave],
441                                                 tx_fail_slave * sizeof(bufs[0]));
442                         }
443                         num_tx_total += num_tx_slave;
444                 }
445         }
446
447         return num_tx_total;
448 }
449
450 static uint16_t
451 bond_ethdev_tx_burst_active_backup(void *queue,
452                 struct rte_mbuf **bufs, uint16_t nb_pkts)
453 {
454         struct bond_dev_private *internals;
455         struct bond_tx_queue *bd_tx_q;
456
457         bd_tx_q = (struct bond_tx_queue *)queue;
458         internals = bd_tx_q->dev_private;
459
460         if (internals->active_slave_count < 1)
461                 return 0;
462
463         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
464                         bufs, nb_pkts);
465 }
466
467 static inline uint16_t
468 ether_hash(struct ether_hdr *eth_hdr)
469 {
470         unaligned_uint16_t *word_src_addr =
471                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
472         unaligned_uint16_t *word_dst_addr =
473                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
474
475         return (word_src_addr[0] ^ word_dst_addr[0]) ^
476                         (word_src_addr[1] ^ word_dst_addr[1]) ^
477                         (word_src_addr[2] ^ word_dst_addr[2]);
478 }
479
480 static inline uint32_t
481 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
482 {
483         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
484 }
485
486 static inline uint32_t
487 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
488 {
489         unaligned_uint32_t *word_src_addr =
490                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
491         unaligned_uint32_t *word_dst_addr =
492                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
493
494         return (word_src_addr[0] ^ word_dst_addr[0]) ^
495                         (word_src_addr[1] ^ word_dst_addr[1]) ^
496                         (word_src_addr[2] ^ word_dst_addr[2]) ^
497                         (word_src_addr[3] ^ word_dst_addr[3]);
498 }
499
500 uint16_t
501 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
502 {
503         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
504
505         uint32_t hash = ether_hash(eth_hdr);
506
507         return (hash ^= hash >> 8) % slave_count;
508 }
509
510 uint16_t
511 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
512 {
513         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
514         uint16_t proto = eth_hdr->ether_type;
515         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
516         uint32_t hash, l3hash = 0;
517
518         hash = ether_hash(eth_hdr);
519
520         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
521                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
522                                 ((char *)(eth_hdr + 1) + vlan_offset);
523                 l3hash = ipv4_hash(ipv4_hdr);
524
525         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
526                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
527                                 ((char *)(eth_hdr + 1) + vlan_offset);
528                 l3hash = ipv6_hash(ipv6_hdr);
529         }
530
531         hash = hash ^ l3hash;
532         hash ^= hash >> 16;
533         hash ^= hash >> 8;
534
535         return hash % slave_count;
536 }
537
538 uint16_t
539 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
540 {
541         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
542         uint16_t proto = eth_hdr->ether_type;
543         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
544
545         struct udp_hdr *udp_hdr = NULL;
546         struct tcp_hdr *tcp_hdr = NULL;
547         uint32_t hash, l3hash = 0, l4hash = 0;
548
549         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
550                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
551                                 ((char *)(eth_hdr + 1) + vlan_offset);
552                 size_t ip_hdr_offset;
553
554                 l3hash = ipv4_hash(ipv4_hdr);
555
556                 /* there is no L4 header in fragmented packet */
557                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
558                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
559                                         IPV4_IHL_MULTIPLIER;
560
561                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
562                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
563                                                 ip_hdr_offset);
564                                 l4hash = HASH_L4_PORTS(tcp_hdr);
565                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
566                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
567                                                 ip_hdr_offset);
568                                 l4hash = HASH_L4_PORTS(udp_hdr);
569                         }
570                 }
571         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
572                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
573                                 ((char *)(eth_hdr + 1) + vlan_offset);
574                 l3hash = ipv6_hash(ipv6_hdr);
575
576                 if (ipv6_hdr->proto == IPPROTO_TCP) {
577                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
578                         l4hash = HASH_L4_PORTS(tcp_hdr);
579                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
580                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
581                         l4hash = HASH_L4_PORTS(udp_hdr);
582                 }
583         }
584
585         hash = l3hash ^ l4hash;
586         hash ^= hash >> 16;
587         hash ^= hash >> 8;
588
589         return hash % slave_count;
590 }
591
592 struct bwg_slave {
593         uint64_t bwg_left_int;
594         uint64_t bwg_left_remainder;
595         uint8_t slave;
596 };
597
598 void
599 bond_tlb_activate_slave(struct bond_dev_private *internals) {
600         int i;
601
602         for (i = 0; i < internals->active_slave_count; i++) {
603                 tlb_last_obytets[internals->active_slaves[i]] = 0;
604         }
605 }
606
607 static int
608 bandwidth_cmp(const void *a, const void *b)
609 {
610         const struct bwg_slave *bwg_a = a;
611         const struct bwg_slave *bwg_b = b;
612         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
613         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
614                         (int64_t)bwg_a->bwg_left_remainder;
615         if (diff > 0)
616                 return 1;
617         else if (diff < 0)
618                 return -1;
619         else if (diff2 > 0)
620                 return 1;
621         else if (diff2 < 0)
622                 return -1;
623         else
624                 return 0;
625 }
626
627 static void
628 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
629                 struct bwg_slave *bwg_slave)
630 {
631         struct rte_eth_link link_status;
632
633         rte_eth_link_get(port_id, &link_status);
634         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
635         if (link_bwg == 0)
636                 return;
637         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
638         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
639         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
640 }
641
642 static void
643 bond_ethdev_update_tlb_slave_cb(void *arg)
644 {
645         struct bond_dev_private *internals = arg;
646         struct rte_eth_stats slave_stats;
647         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
648         uint8_t slave_count;
649         uint64_t tx_bytes;
650
651         uint8_t update_stats = 0;
652         uint8_t i, slave_id;
653
654         internals->slave_update_idx++;
655
656
657         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
658                 update_stats = 1;
659
660         for (i = 0; i < internals->active_slave_count; i++) {
661                 slave_id = internals->active_slaves[i];
662                 rte_eth_stats_get(slave_id, &slave_stats);
663                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
664                 bandwidth_left(slave_id, tx_bytes,
665                                 internals->slave_update_idx, &bwg_array[i]);
666                 bwg_array[i].slave = slave_id;
667
668                 if (update_stats) {
669                         tlb_last_obytets[slave_id] = slave_stats.obytes;
670                 }
671         }
672
673         if (update_stats == 1)
674                 internals->slave_update_idx = 0;
675
676         slave_count = i;
677         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
678         for (i = 0; i < slave_count; i++)
679                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
680
681         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
682                         (struct bond_dev_private *)internals);
683 }
684
685 static uint16_t
686 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
687 {
688         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
689         struct bond_dev_private *internals = bd_tx_q->dev_private;
690
691         struct rte_eth_dev *primary_port =
692                         &rte_eth_devices[internals->primary_port];
693         uint16_t num_tx_total = 0;
694         uint8_t i, j;
695
696         uint8_t num_of_slaves = internals->active_slave_count;
697         uint8_t slaves[RTE_MAX_ETHPORTS];
698
699         struct ether_hdr *ether_hdr;
700         struct ether_addr primary_slave_addr;
701         struct ether_addr active_slave_addr;
702
703         if (num_of_slaves < 1)
704                 return num_tx_total;
705
706         memcpy(slaves, internals->tlb_slaves_order,
707                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
708
709
710         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
711
712         if (nb_pkts > 3) {
713                 for (i = 0; i < 3; i++)
714                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
715         }
716
717         for (i = 0; i < num_of_slaves; i++) {
718                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
719                 for (j = num_tx_total; j < nb_pkts; j++) {
720                         if (j + 3 < nb_pkts)
721                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
722
723                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
724                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
725                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
726 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
727                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
728 #endif
729                 }
730
731                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
732                                 bufs + num_tx_total, nb_pkts - num_tx_total);
733
734                 if (num_tx_total == nb_pkts)
735                         break;
736         }
737
738         return num_tx_total;
739 }
740
741 void
742 bond_tlb_disable(struct bond_dev_private *internals)
743 {
744         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
745 }
746
747 void
748 bond_tlb_enable(struct bond_dev_private *internals)
749 {
750         bond_ethdev_update_tlb_slave_cb(internals);
751 }
752
753 static uint16_t
754 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
755 {
756         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
757         struct bond_dev_private *internals = bd_tx_q->dev_private;
758
759         struct ether_hdr *eth_h;
760         uint16_t ether_type, offset;
761
762         struct client_data *client_info;
763
764         /*
765          * We create transmit buffers for every slave and one additional to send
766          * through tlb. In worst case every packet will be send on one port.
767          */
768         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
769         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
770
771         /*
772          * We create separate transmit buffers for update packets as they wont be
773          * counted in num_tx_total.
774          */
775         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
776         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
777
778         struct rte_mbuf *upd_pkt;
779         size_t pkt_size;
780
781         uint16_t num_send, num_not_send = 0;
782         uint16_t num_tx_total = 0;
783         uint8_t slave_idx;
784
785         int i, j;
786
787         /* Search tx buffer for ARP packets and forward them to alb */
788         for (i = 0; i < nb_pkts; i++) {
789                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
790                 ether_type = eth_h->ether_type;
791                 offset = get_vlan_offset(eth_h, &ether_type);
792
793                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
794                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
795
796                         /* Change src mac in eth header */
797                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
798
799                         /* Add packet to slave tx buffer */
800                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
801                         slave_bufs_pkts[slave_idx]++;
802                 } else {
803                         /* If packet is not ARP, send it with TLB policy */
804                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
805                                         bufs[i];
806                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
807                 }
808         }
809
810         /* Update connected client ARP tables */
811         if (internals->mode6.ntt) {
812                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
813                         client_info = &internals->mode6.client_table[i];
814
815                         if (client_info->in_use) {
816                                 /* Allocate new packet to send ARP update on current slave */
817                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
818                                 if (upd_pkt == NULL) {
819                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
820                                         continue;
821                                 }
822                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
823                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
824                                 upd_pkt->data_len = pkt_size;
825                                 upd_pkt->pkt_len = pkt_size;
826
827                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
828                                                 internals);
829
830                                 /* Add packet to update tx buffer */
831                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
832                                 update_bufs_pkts[slave_idx]++;
833                         }
834                 }
835                 internals->mode6.ntt = 0;
836         }
837
838         /* Send ARP packets on proper slaves */
839         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
840                 if (slave_bufs_pkts[i] > 0) {
841                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
842                                         slave_bufs[i], slave_bufs_pkts[i]);
843                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
844                                 bufs[nb_pkts - 1 - num_not_send - j] =
845                                                 slave_bufs[i][nb_pkts - 1 - j];
846                         }
847
848                         num_tx_total += num_send;
849                         num_not_send += slave_bufs_pkts[i] - num_send;
850
851 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
852         /* Print TX stats including update packets */
853                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
854                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
855                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
856                         }
857 #endif
858                 }
859         }
860
861         /* Send update packets on proper slaves */
862         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
863                 if (update_bufs_pkts[i] > 0) {
864                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
865                                         update_bufs_pkts[i]);
866                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
867                                 rte_pktmbuf_free(update_bufs[i][j]);
868                         }
869 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
870                         for (j = 0; j < update_bufs_pkts[i]; j++) {
871                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
872                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
873                         }
874 #endif
875                 }
876         }
877
878         /* Send non-ARP packets using tlb policy */
879         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
880                 num_send = bond_ethdev_tx_burst_tlb(queue,
881                                 slave_bufs[RTE_MAX_ETHPORTS],
882                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
883
884                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
885                         bufs[nb_pkts - 1 - num_not_send - j] =
886                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
887                 }
888
889                 num_tx_total += num_send;
890                 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
891         }
892
893         return num_tx_total;
894 }
895
896 static uint16_t
897 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
898                 uint16_t nb_pkts)
899 {
900         struct bond_dev_private *internals;
901         struct bond_tx_queue *bd_tx_q;
902
903         uint8_t num_of_slaves;
904         uint8_t slaves[RTE_MAX_ETHPORTS];
905
906         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
907
908         int i, op_slave_id;
909
910         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
911         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
912
913         bd_tx_q = (struct bond_tx_queue *)queue;
914         internals = bd_tx_q->dev_private;
915
916         /* Copy slave list to protect against slave up/down changes during tx
917          * bursting */
918         num_of_slaves = internals->active_slave_count;
919         memcpy(slaves, internals->active_slaves,
920                         sizeof(internals->active_slaves[0]) * num_of_slaves);
921
922         if (num_of_slaves < 1)
923                 return num_tx_total;
924
925         /* Populate slaves mbuf with the packets which are to be sent on it  */
926         for (i = 0; i < nb_pkts; i++) {
927                 /* Select output slave using hash based on xmit policy */
928                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
929
930                 /* Populate slave mbuf arrays with mbufs for that slave */
931                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
932         }
933
934         /* Send packet burst on each slave device */
935         for (i = 0; i < num_of_slaves; i++) {
936                 if (slave_nb_pkts[i] > 0) {
937                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
938                                         slave_bufs[i], slave_nb_pkts[i]);
939
940                         /* if tx burst fails move packets to end of bufs */
941                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
942                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
943
944                                 tx_fail_total += slave_tx_fail_count;
945                                 memcpy(&bufs[nb_pkts - tx_fail_total],
946                                                 &slave_bufs[i][num_tx_slave],
947                                                 slave_tx_fail_count * sizeof(bufs[0]));
948                         }
949
950                         num_tx_total += num_tx_slave;
951                 }
952         }
953
954         return num_tx_total;
955 }
956
957 static uint16_t
958 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
959                 uint16_t nb_pkts)
960 {
961         struct bond_dev_private *internals;
962         struct bond_tx_queue *bd_tx_q;
963
964         uint8_t num_of_slaves;
965         uint8_t slaves[RTE_MAX_ETHPORTS];
966          /* positions in slaves, not ID */
967         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
968         uint8_t distributing_count;
969
970         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
971         uint16_t i, j, op_slave_idx;
972         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
973
974         /* Allocate additional packets in case 8023AD mode. */
975         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
976         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
977
978         /* Total amount of packets in slave_bufs */
979         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
980         /* Slow packets placed in each slave */
981         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
982
983         bd_tx_q = (struct bond_tx_queue *)queue;
984         internals = bd_tx_q->dev_private;
985
986         /* Copy slave list to protect against slave up/down changes during tx
987          * bursting */
988         num_of_slaves = internals->active_slave_count;
989         if (num_of_slaves < 1)
990                 return num_tx_total;
991
992         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
993
994         distributing_count = 0;
995         for (i = 0; i < num_of_slaves; i++) {
996                 struct port *port = &mode_8023ad_ports[slaves[i]];
997
998                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
999                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
1000                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1001
1002                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1003                         slave_bufs[i][j] = slow_pkts[j];
1004
1005                 if (ACTOR_STATE(port, DISTRIBUTING))
1006                         distributing_offsets[distributing_count++] = i;
1007         }
1008
1009         if (likely(distributing_count > 0)) {
1010                 /* Populate slaves mbuf with the packets which are to be sent on it */
1011                 for (i = 0; i < nb_pkts; i++) {
1012                         /* Select output slave using hash based on xmit policy */
1013                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1014
1015                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1016                          * slaves that are currently distributing. */
1017                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1018                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1019                         slave_nb_pkts[slave_offset]++;
1020                 }
1021         }
1022
1023         /* Send packet burst on each slave device */
1024         for (i = 0; i < num_of_slaves; i++) {
1025                 if (slave_nb_pkts[i] == 0)
1026                         continue;
1027
1028                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1029                                 slave_bufs[i], slave_nb_pkts[i]);
1030
1031                 /* If tx burst fails drop slow packets */
1032                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1033                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1034
1035                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1036                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1037
1038                 /* If tx burst fails move packets to end of bufs */
1039                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1040                         uint16_t j = nb_pkts - num_tx_fail_total;
1041                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1042                                 bufs[j] = slave_bufs[i][num_tx_slave];
1043                 }
1044         }
1045
1046         return num_tx_total;
1047 }
1048
1049 static uint16_t
1050 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1051                 uint16_t nb_pkts)
1052 {
1053         struct bond_dev_private *internals;
1054         struct bond_tx_queue *bd_tx_q;
1055
1056         uint8_t tx_failed_flag = 0, num_of_slaves;
1057         uint8_t slaves[RTE_MAX_ETHPORTS];
1058
1059         uint16_t max_nb_of_tx_pkts = 0;
1060
1061         int slave_tx_total[RTE_MAX_ETHPORTS];
1062         int i, most_successful_tx_slave = -1;
1063
1064         bd_tx_q = (struct bond_tx_queue *)queue;
1065         internals = bd_tx_q->dev_private;
1066
1067         /* Copy slave list to protect against slave up/down changes during tx
1068          * bursting */
1069         num_of_slaves = internals->active_slave_count;
1070         memcpy(slaves, internals->active_slaves,
1071                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1072
1073         if (num_of_slaves < 1)
1074                 return 0;
1075
1076         /* Increment reference count on mbufs */
1077         for (i = 0; i < nb_pkts; i++)
1078                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1079
1080         /* Transmit burst on each active slave */
1081         for (i = 0; i < num_of_slaves; i++) {
1082                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1083                                         bufs, nb_pkts);
1084
1085                 if (unlikely(slave_tx_total[i] < nb_pkts))
1086                         tx_failed_flag = 1;
1087
1088                 /* record the value and slave index for the slave which transmits the
1089                  * maximum number of packets */
1090                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1091                         max_nb_of_tx_pkts = slave_tx_total[i];
1092                         most_successful_tx_slave = i;
1093                 }
1094         }
1095
1096         /* if slaves fail to transmit packets from burst, the calling application
1097          * is not expected to know about multiple references to packets so we must
1098          * handle failures of all packets except those of the most successful slave
1099          */
1100         if (unlikely(tx_failed_flag))
1101                 for (i = 0; i < num_of_slaves; i++)
1102                         if (i != most_successful_tx_slave)
1103                                 while (slave_tx_total[i] < nb_pkts)
1104                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1105
1106         return max_nb_of_tx_pkts;
1107 }
1108
1109 void
1110 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1111                 struct rte_eth_link *slave_dev_link)
1112 {
1113         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1114         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1115
1116         if (slave_dev_link->link_status &&
1117                 bonded_eth_dev->data->dev_started) {
1118                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1119                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1120
1121                 internals->link_props_set = 1;
1122         }
1123 }
1124
1125 void
1126 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1127 {
1128         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1129
1130         memset(&(bonded_eth_dev->data->dev_link), 0,
1131                         sizeof(bonded_eth_dev->data->dev_link));
1132
1133         internals->link_props_set = 0;
1134 }
1135
1136 int
1137 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1138                 struct rte_eth_link *slave_dev_link)
1139 {
1140         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1141                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1142                 return -1;
1143
1144         return 0;
1145 }
1146
1147 int
1148 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1149 {
1150         struct ether_addr *mac_addr;
1151
1152         if (eth_dev == NULL) {
1153                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1154                 return -1;
1155         }
1156
1157         if (dst_mac_addr == NULL) {
1158                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1159                 return -1;
1160         }
1161
1162         mac_addr = eth_dev->data->mac_addrs;
1163
1164         ether_addr_copy(mac_addr, dst_mac_addr);
1165         return 0;
1166 }
1167
1168 int
1169 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1170 {
1171         struct ether_addr *mac_addr;
1172
1173         if (eth_dev == NULL) {
1174                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1175                 return -1;
1176         }
1177
1178         if (new_mac_addr == NULL) {
1179                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1180                 return -1;
1181         }
1182
1183         mac_addr = eth_dev->data->mac_addrs;
1184
1185         /* If new MAC is different to current MAC then update */
1186         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1187                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1188
1189         return 0;
1190 }
1191
1192 int
1193 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1194 {
1195         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1196         int i;
1197
1198         /* Update slave devices MAC addresses */
1199         if (internals->slave_count < 1)
1200                 return -1;
1201
1202         switch (internals->mode) {
1203         case BONDING_MODE_ROUND_ROBIN:
1204         case BONDING_MODE_BALANCE:
1205         case BONDING_MODE_BROADCAST:
1206                 for (i = 0; i < internals->slave_count; i++) {
1207                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1208                                         bonded_eth_dev->data->mac_addrs)) {
1209                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1210                                                 internals->slaves[i].port_id);
1211                                 return -1;
1212                         }
1213                 }
1214                 break;
1215         case BONDING_MODE_8023AD:
1216                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1217                 break;
1218         case BONDING_MODE_ACTIVE_BACKUP:
1219         case BONDING_MODE_TLB:
1220         case BONDING_MODE_ALB:
1221         default:
1222                 for (i = 0; i < internals->slave_count; i++) {
1223                         if (internals->slaves[i].port_id ==
1224                                         internals->current_primary_port) {
1225                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1226                                                 bonded_eth_dev->data->mac_addrs)) {
1227                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1228                                                         internals->current_primary_port);
1229                                         return -1;
1230                                 }
1231                         } else {
1232                                 if (mac_address_set(
1233                                                 &rte_eth_devices[internals->slaves[i].port_id],
1234                                                 &internals->slaves[i].persisted_mac_addr)) {
1235                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1236                                                         internals->slaves[i].port_id);
1237                                         return -1;
1238                                 }
1239                         }
1240                 }
1241         }
1242
1243         return 0;
1244 }
1245
1246 int
1247 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1248 {
1249         struct bond_dev_private *internals;
1250
1251         internals = eth_dev->data->dev_private;
1252
1253         switch (mode) {
1254         case BONDING_MODE_ROUND_ROBIN:
1255                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1256                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1257                 break;
1258         case BONDING_MODE_ACTIVE_BACKUP:
1259                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1260                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1261                 break;
1262         case BONDING_MODE_BALANCE:
1263                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1264                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1265                 break;
1266         case BONDING_MODE_BROADCAST:
1267                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1268                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1269                 break;
1270         case BONDING_MODE_8023AD:
1271                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1272                         return -1;
1273
1274                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1275                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1276                 RTE_LOG(WARNING, PMD,
1277                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1278                                 "at least every 100ms.\n");
1279                 break;
1280         case BONDING_MODE_TLB:
1281                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1282                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1283                 break;
1284         case BONDING_MODE_ALB:
1285                 if (bond_mode_alb_enable(eth_dev) != 0)
1286                         return -1;
1287
1288                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1289                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1290                 break;
1291         default:
1292                 return -1;
1293         }
1294
1295         internals->mode = mode;
1296
1297         return 0;
1298 }
1299
1300 int
1301 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1302                 struct rte_eth_dev *slave_eth_dev)
1303 {
1304         struct bond_rx_queue *bd_rx_q;
1305         struct bond_tx_queue *bd_tx_q;
1306
1307         int errval;
1308         uint16_t q_id;
1309
1310         /* Stop slave */
1311         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1312
1313         /* Enable interrupts on slave device if supported */
1314         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1315                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1316
1317         /* If RSS is enabled for bonding, try to enable it for slaves  */
1318         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1319                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1320                                 != 0) {
1321                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1322                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1323                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1324                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1325                 } else {
1326                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1327                 }
1328
1329                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1330                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1331                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1332                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1333         }
1334
1335         /* Configure device */
1336         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1337                         bonded_eth_dev->data->nb_rx_queues,
1338                         bonded_eth_dev->data->nb_tx_queues,
1339                         &(slave_eth_dev->data->dev_conf));
1340         if (errval != 0) {
1341                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1342                                 slave_eth_dev->data->port_id, errval);
1343                 return errval;
1344         }
1345
1346         /* Setup Rx Queues */
1347         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1348                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1349
1350                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1351                                 bd_rx_q->nb_rx_desc,
1352                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1353                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1354                 if (errval != 0) {
1355                         RTE_BOND_LOG(ERR,
1356                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1357                                         slave_eth_dev->data->port_id, q_id, errval);
1358                         return errval;
1359                 }
1360         }
1361
1362         /* Setup Tx Queues */
1363         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1364                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1365
1366                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1367                                 bd_tx_q->nb_tx_desc,
1368                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1369                                 &bd_tx_q->tx_conf);
1370                 if (errval != 0) {
1371                         RTE_BOND_LOG(ERR,
1372                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1373                                         slave_eth_dev->data->port_id, q_id, errval);
1374                         return errval;
1375                 }
1376         }
1377
1378         /* Start device */
1379         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1380         if (errval != 0) {
1381                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1382                                 slave_eth_dev->data->port_id, errval);
1383                 return -1;
1384         }
1385
1386         /* If RSS is enabled for bonding, synchronize RETA */
1387         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1388                 int i;
1389                 struct bond_dev_private *internals;
1390
1391                 internals = bonded_eth_dev->data->dev_private;
1392
1393                 for (i = 0; i < internals->slave_count; i++) {
1394                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1395                                 errval = rte_eth_dev_rss_reta_update(
1396                                                 slave_eth_dev->data->port_id,
1397                                                 &internals->reta_conf[0],
1398                                                 internals->slaves[i].reta_size);
1399                                 if (errval != 0) {
1400                                         RTE_LOG(WARNING, PMD,
1401                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1402                                                         " RSS Configuration for bonding may be inconsistent.\n",
1403                                                         slave_eth_dev->data->port_id, errval);
1404                                 }
1405                                 break;
1406                         }
1407                 }
1408         }
1409
1410         /* If lsc interrupt is set, check initial slave's link status */
1411         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1412                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1413                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1414
1415         return 0;
1416 }
1417
1418 void
1419 slave_remove(struct bond_dev_private *internals,
1420                 struct rte_eth_dev *slave_eth_dev)
1421 {
1422         uint8_t i;
1423
1424         for (i = 0; i < internals->slave_count; i++)
1425                 if (internals->slaves[i].port_id ==
1426                                 slave_eth_dev->data->port_id)
1427                         break;
1428
1429         if (i < (internals->slave_count - 1))
1430                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1431                                 sizeof(internals->slaves[0]) *
1432                                 (internals->slave_count - i - 1));
1433
1434         internals->slave_count--;
1435 }
1436
1437 static void
1438 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1439
1440 void
1441 slave_add(struct bond_dev_private *internals,
1442                 struct rte_eth_dev *slave_eth_dev)
1443 {
1444         struct bond_slave_details *slave_details =
1445                         &internals->slaves[internals->slave_count];
1446
1447         slave_details->port_id = slave_eth_dev->data->port_id;
1448         slave_details->last_link_status = 0;
1449
1450         /* If slave device doesn't support interrupts then we need to enabled
1451          * polling to monitor link status */
1452         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1453                 slave_details->link_status_poll_enabled = 1;
1454
1455                 if (!internals->link_status_polling_enabled) {
1456                         internals->link_status_polling_enabled = 1;
1457
1458                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1459                                         bond_ethdev_slave_link_status_change_monitor,
1460                                         (void *)&rte_eth_devices[internals->port_id]);
1461                 }
1462         }
1463
1464         slave_details->link_status_wait_to_complete = 0;
1465         /* clean tlb_last_obytes when adding port for bonding device */
1466         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1467                         sizeof(struct ether_addr));
1468 }
1469
1470 void
1471 bond_ethdev_primary_set(struct bond_dev_private *internals,
1472                 uint8_t slave_port_id)
1473 {
1474         int i;
1475
1476         if (internals->active_slave_count < 1)
1477                 internals->current_primary_port = slave_port_id;
1478         else
1479                 /* Search bonded device slave ports for new proposed primary port */
1480                 for (i = 0; i < internals->active_slave_count; i++) {
1481                         if (internals->active_slaves[i] == slave_port_id)
1482                                 internals->current_primary_port = slave_port_id;
1483                 }
1484 }
1485
1486 static void
1487 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1488
1489 static int
1490 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1491 {
1492         struct bond_dev_private *internals;
1493         int i;
1494
1495         /* slave eth dev will be started by bonded device */
1496         if (check_for_bonded_ethdev(eth_dev)) {
1497                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1498                                 eth_dev->data->port_id);
1499                 return -1;
1500         }
1501
1502         eth_dev->data->dev_link.link_status = 0;
1503         eth_dev->data->dev_started = 1;
1504
1505         internals = eth_dev->data->dev_private;
1506
1507         if (internals->slave_count == 0) {
1508                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1509                 return -1;
1510         }
1511
1512         if (internals->user_defined_mac == 0) {
1513                 struct ether_addr *new_mac_addr = NULL;
1514
1515                 for (i = 0; i < internals->slave_count; i++)
1516                         if (internals->slaves[i].port_id == internals->primary_port)
1517                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1518
1519                 if (new_mac_addr == NULL)
1520                         return -1;
1521
1522                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1523                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1524                                         eth_dev->data->port_id);
1525                         return -1;
1526                 }
1527         }
1528
1529         /* Update all slave devices MACs*/
1530         if (mac_address_slaves_update(eth_dev) != 0)
1531                 return -1;
1532
1533         /* If bonded device is configure in promiscuous mode then re-apply config */
1534         if (internals->promiscuous_en)
1535                 bond_ethdev_promiscuous_enable(eth_dev);
1536
1537         /* Reconfigure each slave device if starting bonded device */
1538         for (i = 0; i < internals->slave_count; i++) {
1539                 if (slave_configure(eth_dev,
1540                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1541                         RTE_BOND_LOG(ERR,
1542                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1543                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1544                         return -1;
1545                 }
1546         }
1547
1548         if (internals->user_defined_primary_port)
1549                 bond_ethdev_primary_set(internals, internals->primary_port);
1550
1551         if (internals->mode == BONDING_MODE_8023AD)
1552                 bond_mode_8023ad_start(eth_dev);
1553
1554         if (internals->mode == BONDING_MODE_TLB ||
1555                         internals->mode == BONDING_MODE_ALB)
1556                 bond_tlb_enable(internals);
1557
1558         return 0;
1559 }
1560
1561 static void
1562 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1563 {
1564         uint8_t i;
1565
1566         if (dev->data->rx_queues != NULL) {
1567                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1568                         rte_free(dev->data->rx_queues[i]);
1569                         dev->data->rx_queues[i] = NULL;
1570                 }
1571                 dev->data->nb_rx_queues = 0;
1572         }
1573
1574         if (dev->data->tx_queues != NULL) {
1575                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1576                         rte_free(dev->data->tx_queues[i]);
1577                         dev->data->tx_queues[i] = NULL;
1578                 }
1579                 dev->data->nb_tx_queues = 0;
1580         }
1581 }
1582
1583 void
1584 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1585 {
1586         struct bond_dev_private *internals = eth_dev->data->dev_private;
1587         uint8_t i;
1588
1589         if (internals->mode == BONDING_MODE_8023AD) {
1590                 struct port *port;
1591                 void *pkt = NULL;
1592
1593                 bond_mode_8023ad_stop(eth_dev);
1594
1595                 /* Discard all messages to/from mode 4 state machines */
1596                 for (i = 0; i < internals->active_slave_count; i++) {
1597                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1598
1599                         RTE_VERIFY(port->rx_ring != NULL);
1600                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1601                                 rte_pktmbuf_free(pkt);
1602
1603                         RTE_VERIFY(port->tx_ring != NULL);
1604                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1605                                 rte_pktmbuf_free(pkt);
1606                 }
1607         }
1608
1609         if (internals->mode == BONDING_MODE_TLB ||
1610                         internals->mode == BONDING_MODE_ALB) {
1611                 bond_tlb_disable(internals);
1612                 for (i = 0; i < internals->active_slave_count; i++)
1613                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1614         }
1615
1616         internals->active_slave_count = 0;
1617         internals->link_status_polling_enabled = 0;
1618
1619         eth_dev->data->dev_link.link_status = 0;
1620         eth_dev->data->dev_started = 0;
1621 }
1622
1623 void
1624 bond_ethdev_close(struct rte_eth_dev *dev)
1625 {
1626         bond_ethdev_free_queues(dev);
1627 }
1628
1629 /* forward declaration */
1630 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1631
1632 static void
1633 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1634 {
1635         struct bond_dev_private *internals = dev->data->dev_private;
1636
1637         dev_info->max_mac_addrs = 1;
1638
1639         dev_info->max_rx_pktlen = (uint32_t)2048;
1640
1641         dev_info->max_rx_queues = (uint16_t)128;
1642         dev_info->max_tx_queues = (uint16_t)512;
1643
1644         dev_info->min_rx_bufsize = 0;
1645         dev_info->pci_dev = NULL;
1646
1647         dev_info->rx_offload_capa = internals->rx_offload_capa;
1648         dev_info->tx_offload_capa = internals->tx_offload_capa;
1649         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1650
1651         dev_info->reta_size = internals->reta_size;
1652 }
1653
1654 static int
1655 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1656                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1657                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1658 {
1659         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1660                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1661                                         0, dev->data->numa_node);
1662         if (bd_rx_q == NULL)
1663                 return -1;
1664
1665         bd_rx_q->queue_id = rx_queue_id;
1666         bd_rx_q->dev_private = dev->data->dev_private;
1667
1668         bd_rx_q->nb_rx_desc = nb_rx_desc;
1669
1670         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1671         bd_rx_q->mb_pool = mb_pool;
1672
1673         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1674
1675         return 0;
1676 }
1677
1678 static int
1679 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1680                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1681                 const struct rte_eth_txconf *tx_conf)
1682 {
1683         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1684                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1685                                         0, dev->data->numa_node);
1686
1687         if (bd_tx_q == NULL)
1688                 return -1;
1689
1690         bd_tx_q->queue_id = tx_queue_id;
1691         bd_tx_q->dev_private = dev->data->dev_private;
1692
1693         bd_tx_q->nb_tx_desc = nb_tx_desc;
1694         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1695
1696         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1697
1698         return 0;
1699 }
1700
1701 static void
1702 bond_ethdev_rx_queue_release(void *queue)
1703 {
1704         if (queue == NULL)
1705                 return;
1706
1707         rte_free(queue);
1708 }
1709
1710 static void
1711 bond_ethdev_tx_queue_release(void *queue)
1712 {
1713         if (queue == NULL)
1714                 return;
1715
1716         rte_free(queue);
1717 }
1718
1719 static void
1720 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1721 {
1722         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1723         struct bond_dev_private *internals;
1724
1725         /* Default value for polling slave found is true as we don't want to
1726          * disable the polling thread if we cannot get the lock */
1727         int i, polling_slave_found = 1;
1728
1729         if (cb_arg == NULL)
1730                 return;
1731
1732         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1733         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1734
1735         if (!bonded_ethdev->data->dev_started ||
1736                 !internals->link_status_polling_enabled)
1737                 return;
1738
1739         /* If device is currently being configured then don't check slaves link
1740          * status, wait until next period */
1741         if (rte_spinlock_trylock(&internals->lock)) {
1742                 if (internals->slave_count > 0)
1743                         polling_slave_found = 0;
1744
1745                 for (i = 0; i < internals->slave_count; i++) {
1746                         if (!internals->slaves[i].link_status_poll_enabled)
1747                                 continue;
1748
1749                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1750                         polling_slave_found = 1;
1751
1752                         /* Update slave link status */
1753                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1754                                         internals->slaves[i].link_status_wait_to_complete);
1755
1756                         /* if link status has changed since last checked then call lsc
1757                          * event callback */
1758                         if (slave_ethdev->data->dev_link.link_status !=
1759                                         internals->slaves[i].last_link_status) {
1760                                 internals->slaves[i].last_link_status =
1761                                                 slave_ethdev->data->dev_link.link_status;
1762
1763                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1764                                                 RTE_ETH_EVENT_INTR_LSC,
1765                                                 &bonded_ethdev->data->port_id);
1766                         }
1767                 }
1768                 rte_spinlock_unlock(&internals->lock);
1769         }
1770
1771         if (polling_slave_found)
1772                 /* Set alarm to continue monitoring link status of slave ethdev's */
1773                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1774                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1775 }
1776
1777 static int
1778 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1779                 int wait_to_complete)
1780 {
1781         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1782
1783         if (!bonded_eth_dev->data->dev_started ||
1784                 internals->active_slave_count == 0) {
1785                 bonded_eth_dev->data->dev_link.link_status = 0;
1786                 return 0;
1787         } else {
1788                 struct rte_eth_dev *slave_eth_dev;
1789                 int i, link_up = 0;
1790
1791                 for (i = 0; i < internals->active_slave_count; i++) {
1792                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1793
1794                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1795                                         wait_to_complete);
1796                         if (slave_eth_dev->data->dev_link.link_status == 1) {
1797                                 link_up = 1;
1798                                 break;
1799                         }
1800                 }
1801
1802                 bonded_eth_dev->data->dev_link.link_status = link_up;
1803         }
1804
1805         return 0;
1806 }
1807
1808 static void
1809 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1810 {
1811         struct bond_dev_private *internals = dev->data->dev_private;
1812         struct rte_eth_stats slave_stats;
1813         int i, j;
1814
1815         for (i = 0; i < internals->slave_count; i++) {
1816                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1817
1818                 stats->ipackets += slave_stats.ipackets;
1819                 stats->opackets += slave_stats.opackets;
1820                 stats->ibytes += slave_stats.ibytes;
1821                 stats->obytes += slave_stats.obytes;
1822                 stats->imissed += slave_stats.imissed;
1823                 stats->ierrors += slave_stats.ierrors;
1824                 stats->oerrors += slave_stats.oerrors;
1825                 stats->imcasts += slave_stats.imcasts;
1826                 stats->rx_nombuf += slave_stats.rx_nombuf;
1827
1828                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1829                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1830                         stats->q_opackets[j] += slave_stats.q_opackets[j];
1831                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1832                         stats->q_obytes[j] += slave_stats.q_obytes[j];
1833                         stats->q_errors[j] += slave_stats.q_errors[j];
1834                 }
1835
1836         }
1837 }
1838
1839 static void
1840 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1841 {
1842         struct bond_dev_private *internals = dev->data->dev_private;
1843         int i;
1844
1845         for (i = 0; i < internals->slave_count; i++)
1846                 rte_eth_stats_reset(internals->slaves[i].port_id);
1847 }
1848
1849 static void
1850 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1851 {
1852         struct bond_dev_private *internals = eth_dev->data->dev_private;
1853         int i;
1854
1855         internals->promiscuous_en = 1;
1856
1857         switch (internals->mode) {
1858         /* Promiscuous mode is propagated to all slaves */
1859         case BONDING_MODE_ROUND_ROBIN:
1860         case BONDING_MODE_BALANCE:
1861         case BONDING_MODE_BROADCAST:
1862                 for (i = 0; i < internals->slave_count; i++)
1863                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1864                 break;
1865         /* In mode4 promiscus mode is managed when slave is added/removed */
1866         case BONDING_MODE_8023AD:
1867                 break;
1868         /* Promiscuous mode is propagated only to primary slave */
1869         case BONDING_MODE_ACTIVE_BACKUP:
1870         case BONDING_MODE_TLB:
1871         case BONDING_MODE_ALB:
1872         default:
1873                 rte_eth_promiscuous_enable(internals->current_primary_port);
1874         }
1875 }
1876
1877 static void
1878 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1879 {
1880         struct bond_dev_private *internals = dev->data->dev_private;
1881         int i;
1882
1883         internals->promiscuous_en = 0;
1884
1885         switch (internals->mode) {
1886         /* Promiscuous mode is propagated to all slaves */
1887         case BONDING_MODE_ROUND_ROBIN:
1888         case BONDING_MODE_BALANCE:
1889         case BONDING_MODE_BROADCAST:
1890                 for (i = 0; i < internals->slave_count; i++)
1891                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1892                 break;
1893         /* In mode4 promiscus mode is set managed when slave is added/removed */
1894         case BONDING_MODE_8023AD:
1895                 break;
1896         /* Promiscuous mode is propagated only to primary slave */
1897         case BONDING_MODE_ACTIVE_BACKUP:
1898         case BONDING_MODE_TLB:
1899         case BONDING_MODE_ALB:
1900         default:
1901                 rte_eth_promiscuous_disable(internals->current_primary_port);
1902         }
1903 }
1904
1905 static void
1906 bond_ethdev_delayed_lsc_propagation(void *arg)
1907 {
1908         if (arg == NULL)
1909                 return;
1910
1911         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1912                         RTE_ETH_EVENT_INTR_LSC);
1913 }
1914
1915 void
1916 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1917                 void *param)
1918 {
1919         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1920         struct bond_dev_private *internals;
1921         struct rte_eth_link link;
1922
1923         int i, valid_slave = 0;
1924         uint8_t active_pos;
1925         uint8_t lsc_flag = 0;
1926
1927         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1928                 return;
1929
1930         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1931         slave_eth_dev = &rte_eth_devices[port_id];
1932
1933         if (check_for_bonded_ethdev(bonded_eth_dev))
1934                 return;
1935
1936         internals = bonded_eth_dev->data->dev_private;
1937
1938         /* If the device isn't started don't handle interrupts */
1939         if (!bonded_eth_dev->data->dev_started)
1940                 return;
1941
1942         /* verify that port_id is a valid slave of bonded port */
1943         for (i = 0; i < internals->slave_count; i++) {
1944                 if (internals->slaves[i].port_id == port_id) {
1945                         valid_slave = 1;
1946                         break;
1947                 }
1948         }
1949
1950         if (!valid_slave)
1951                 return;
1952
1953         /* Search for port in active port list */
1954         active_pos = find_slave_by_id(internals->active_slaves,
1955                         internals->active_slave_count, port_id);
1956
1957         rte_eth_link_get_nowait(port_id, &link);
1958         if (link.link_status) {
1959                 if (active_pos < internals->active_slave_count)
1960                         return;
1961
1962                 /* if no active slave ports then set this port to be primary port */
1963                 if (internals->active_slave_count < 1) {
1964                         /* If first active slave, then change link status */
1965                         bonded_eth_dev->data->dev_link.link_status = 1;
1966                         internals->current_primary_port = port_id;
1967                         lsc_flag = 1;
1968
1969                         mac_address_slaves_update(bonded_eth_dev);
1970
1971                         /* Inherit eth dev link properties from first active slave */
1972                         link_properties_set(bonded_eth_dev,
1973                                         &(slave_eth_dev->data->dev_link));
1974                 }
1975
1976                 activate_slave(bonded_eth_dev, port_id);
1977
1978                 /* If user has defined the primary port then default to using it */
1979                 if (internals->user_defined_primary_port &&
1980                                 internals->primary_port == port_id)
1981                         bond_ethdev_primary_set(internals, port_id);
1982         } else {
1983                 if (active_pos == internals->active_slave_count)
1984                         return;
1985
1986                 /* Remove from active slave list */
1987                 deactivate_slave(bonded_eth_dev, port_id);
1988
1989                 /* No active slaves, change link status to down and reset other
1990                  * link properties */
1991                 if (internals->active_slave_count < 1) {
1992                         lsc_flag = 1;
1993                         bonded_eth_dev->data->dev_link.link_status = 0;
1994
1995                         link_properties_reset(bonded_eth_dev);
1996                 }
1997
1998                 /* Update primary id, take first active slave from list or if none
1999                  * available set to -1 */
2000                 if (port_id == internals->current_primary_port) {
2001                         if (internals->active_slave_count > 0)
2002                                 bond_ethdev_primary_set(internals,
2003                                                 internals->active_slaves[0]);
2004                         else
2005                                 internals->current_primary_port = internals->primary_port;
2006                 }
2007         }
2008
2009         if (lsc_flag) {
2010                 /* Cancel any possible outstanding interrupts if delays are enabled */
2011                 if (internals->link_up_delay_ms > 0 ||
2012                         internals->link_down_delay_ms > 0)
2013                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2014                                         bonded_eth_dev);
2015
2016                 if (bonded_eth_dev->data->dev_link.link_status) {
2017                         if (internals->link_up_delay_ms > 0)
2018                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2019                                                 bond_ethdev_delayed_lsc_propagation,
2020                                                 (void *)bonded_eth_dev);
2021                         else
2022                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2023                                                 RTE_ETH_EVENT_INTR_LSC);
2024
2025                 } else {
2026                         if (internals->link_down_delay_ms > 0)
2027                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2028                                                 bond_ethdev_delayed_lsc_propagation,
2029                                                 (void *)bonded_eth_dev);
2030                         else
2031                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2032                                                 RTE_ETH_EVENT_INTR_LSC);
2033                 }
2034         }
2035 }
2036
2037 static int
2038 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2039                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2040 {
2041         unsigned i, j;
2042         int result = 0;
2043         int slave_reta_size;
2044         unsigned reta_count;
2045         struct bond_dev_private *internals = dev->data->dev_private;
2046
2047         if (reta_size != internals->reta_size)
2048                 return -EINVAL;
2049
2050          /* Copy RETA table */
2051         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2052
2053         for (i = 0; i < reta_count; i++) {
2054                 internals->reta_conf[i].mask = reta_conf[i].mask;
2055                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2056                         if ((reta_conf[i].mask >> j) & 0x01)
2057                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2058         }
2059
2060         /* Fill rest of array */
2061         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2062                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2063                                 sizeof(internals->reta_conf[0]) * reta_count);
2064
2065         /* Propagate RETA over slaves */
2066         for (i = 0; i < internals->slave_count; i++) {
2067                 slave_reta_size = internals->slaves[i].reta_size;
2068                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2069                                 &internals->reta_conf[0], slave_reta_size);
2070                 if (result < 0)
2071                         return result;
2072         }
2073
2074         return 0;
2075 }
2076
2077 static int
2078 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2079                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2080 {
2081         int i, j;
2082         struct bond_dev_private *internals = dev->data->dev_private;
2083
2084         if (reta_size != internals->reta_size)
2085                 return -EINVAL;
2086
2087          /* Copy RETA table */
2088         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2089                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2090                         if ((reta_conf[i].mask >> j) & 0x01)
2091                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2092
2093         return 0;
2094 }
2095
2096 static int
2097 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2098                 struct rte_eth_rss_conf *rss_conf)
2099 {
2100         int i, result = 0;
2101         struct bond_dev_private *internals = dev->data->dev_private;
2102         struct rte_eth_rss_conf bond_rss_conf;
2103
2104         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2105
2106         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2107
2108         if (bond_rss_conf.rss_hf != 0)
2109                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2110
2111         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2112                         sizeof(internals->rss_key)) {
2113                 if (bond_rss_conf.rss_key_len == 0)
2114                         bond_rss_conf.rss_key_len = 40;
2115                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2116                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2117                                 internals->rss_key_len);
2118         }
2119
2120         for (i = 0; i < internals->slave_count; i++) {
2121                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2122                                 &bond_rss_conf);
2123                 if (result < 0)
2124                         return result;
2125         }
2126
2127         return 0;
2128 }
2129
2130 static int
2131 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2132                 struct rte_eth_rss_conf *rss_conf)
2133 {
2134         struct bond_dev_private *internals = dev->data->dev_private;
2135
2136         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2137         rss_conf->rss_key_len = internals->rss_key_len;
2138         if (rss_conf->rss_key)
2139                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2140
2141         return 0;
2142 }
2143
2144 struct eth_dev_ops default_dev_ops = {
2145                 .dev_start            = bond_ethdev_start,
2146                 .dev_stop             = bond_ethdev_stop,
2147                 .dev_close            = bond_ethdev_close,
2148                 .dev_configure        = bond_ethdev_configure,
2149                 .dev_infos_get        = bond_ethdev_info,
2150                 .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2151                 .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2152                 .rx_queue_release     = bond_ethdev_rx_queue_release,
2153                 .tx_queue_release     = bond_ethdev_tx_queue_release,
2154                 .link_update          = bond_ethdev_link_update,
2155                 .stats_get            = bond_ethdev_stats_get,
2156                 .stats_reset          = bond_ethdev_stats_reset,
2157                 .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2158                 .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2159                 .reta_update          = bond_ethdev_rss_reta_update,
2160                 .reta_query           = bond_ethdev_rss_reta_query,
2161                 .rss_hash_update      = bond_ethdev_rss_hash_update,
2162                 .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2163 };
2164
2165 static int
2166 bond_init(const char *name, const char *params)
2167 {
2168         struct bond_dev_private *internals;
2169         struct rte_kvargs *kvlist;
2170         uint8_t bonding_mode, socket_id;
2171         int  arg_count, port_id;
2172
2173         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2174
2175         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2176         if (kvlist == NULL)
2177                 return -1;
2178
2179         /* Parse link bonding mode */
2180         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2181                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2182                                 &bond_ethdev_parse_slave_mode_kvarg,
2183                                 &bonding_mode) != 0) {
2184                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2185                                         name);
2186                         goto parse_error;
2187                 }
2188         } else {
2189                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2190                                 "device %s\n", name);
2191                 goto parse_error;
2192         }
2193
2194         /* Parse socket id to create bonding device on */
2195         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2196         if (arg_count == 1) {
2197                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2198                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2199                                 != 0) {
2200                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2201                                         "bonded device %s\n", name);
2202                         goto parse_error;
2203                 }
2204         } else if (arg_count > 1) {
2205                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2206                                 "bonded device %s\n", name);
2207                 goto parse_error;
2208         } else {
2209                 socket_id = rte_socket_id();
2210         }
2211
2212         /* Create link bonding eth device */
2213         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2214         if (port_id < 0) {
2215                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2216                                 "socket %u.\n", name, bonding_mode, socket_id);
2217                 goto parse_error;
2218         }
2219         internals = rte_eth_devices[port_id].data->dev_private;
2220         internals->kvlist = kvlist;
2221
2222         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2223                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2224         return 0;
2225
2226 parse_error:
2227         rte_kvargs_free(kvlist);
2228
2229         return -1;
2230 }
2231
2232 static int
2233 bond_uninit(const char *name)
2234 {
2235         int  ret;
2236
2237         if (name == NULL)
2238                 return -EINVAL;
2239
2240         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2241
2242         /* free link bonding eth device */
2243         ret = rte_eth_bond_free(name);
2244         if (ret < 0)
2245                 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2246
2247         return ret;
2248 }
2249
2250 /* this part will resolve the slave portids after all the other pdev and vdev
2251  * have been allocated */
2252 static int
2253 bond_ethdev_configure(struct rte_eth_dev *dev)
2254 {
2255         char *name = dev->data->name;
2256         struct bond_dev_private *internals = dev->data->dev_private;
2257         struct rte_kvargs *kvlist = internals->kvlist;
2258         int arg_count;
2259         uint8_t port_id = dev - rte_eth_devices;
2260
2261         static const uint8_t default_rss_key[40] = {
2262                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2263                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2264                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2265                 0xBE, 0xAC, 0x01, 0xFA
2266         };
2267
2268         unsigned i, j;
2269
2270         /* If RSS is enabled, fill table and key with default values */
2271         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2272                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2273                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2274                 memcpy(internals->rss_key, default_rss_key, 40);
2275
2276                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2277                         internals->reta_conf[i].mask = ~0LL;
2278                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2279                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2280                 }
2281         }
2282
2283         /*
2284          * if no kvlist, it means that this bonded device has been created
2285          * through the bonding api.
2286          */
2287         if (!kvlist)
2288                 return 0;
2289
2290         /* Parse MAC address for bonded device */
2291         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2292         if (arg_count == 1) {
2293                 struct ether_addr bond_mac;
2294
2295                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2296                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2297                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2298                                         name);
2299                         return -1;
2300                 }
2301
2302                 /* Set MAC address */
2303                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2304                         RTE_LOG(ERR, EAL,
2305                                         "Failed to set mac address on bonded device %s\n",
2306                                         name);
2307                         return -1;
2308                 }
2309         } else if (arg_count > 1) {
2310                 RTE_LOG(ERR, EAL,
2311                                 "MAC address can be specified only once for bonded device %s\n",
2312                                 name);
2313                 return -1;
2314         }
2315
2316         /* Parse/set balance mode transmit policy */
2317         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2318         if (arg_count == 1) {
2319                 uint8_t xmit_policy;
2320
2321                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2322                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2323                                                 0) {
2324                         RTE_LOG(INFO, EAL,
2325                                         "Invalid xmit policy specified for bonded device %s\n",
2326                                         name);
2327                         return -1;
2328                 }
2329
2330                 /* Set balance mode transmit policy*/
2331                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2332                         RTE_LOG(ERR, EAL,
2333                                         "Failed to set balance xmit policy on bonded device %s\n",
2334                                         name);
2335                         return -1;
2336                 }
2337         } else if (arg_count > 1) {
2338                 RTE_LOG(ERR, EAL,
2339                                 "Transmit policy can be specified only once for bonded device"
2340                                 " %s\n", name);
2341                 return -1;
2342         }
2343
2344         /* Parse/add slave ports to bonded device */
2345         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2346                 struct bond_ethdev_slave_ports slave_ports;
2347                 unsigned i;
2348
2349                 memset(&slave_ports, 0, sizeof(slave_ports));
2350
2351                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2352                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2353                         RTE_LOG(ERR, EAL,
2354                                         "Failed to parse slave ports for bonded device %s\n",
2355                                         name);
2356                         return -1;
2357                 }
2358
2359                 for (i = 0; i < slave_ports.slave_count; i++) {
2360                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2361                                 RTE_LOG(ERR, EAL,
2362                                                 "Failed to add port %d as slave to bonded device %s\n",
2363                                                 slave_ports.slaves[i], name);
2364                         }
2365                 }
2366
2367         } else {
2368                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2369                 return -1;
2370         }
2371
2372         /* Parse/set primary slave port id*/
2373         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2374         if (arg_count == 1) {
2375                 uint8_t primary_slave_port_id;
2376
2377                 if (rte_kvargs_process(kvlist,
2378                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
2379                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2380                                 &primary_slave_port_id) < 0) {
2381                         RTE_LOG(INFO, EAL,
2382                                         "Invalid primary slave port id specified for bonded device"
2383                                         " %s\n", name);
2384                         return -1;
2385                 }
2386
2387                 /* Set balance mode transmit policy*/
2388                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2389                                 != 0) {
2390                         RTE_LOG(ERR, EAL,
2391                                         "Failed to set primary slave port %d on bonded device %s\n",
2392                                         primary_slave_port_id, name);
2393                         return -1;
2394                 }
2395         } else if (arg_count > 1) {
2396                 RTE_LOG(INFO, EAL,
2397                                 "Primary slave can be specified only once for bonded device"
2398                                 " %s\n", name);
2399                 return -1;
2400         }
2401
2402         /* Parse link status monitor polling interval */
2403         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2404         if (arg_count == 1) {
2405                 uint32_t lsc_poll_interval_ms;
2406
2407                 if (rte_kvargs_process(kvlist,
2408                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2409                                 &bond_ethdev_parse_time_ms_kvarg,
2410                                 &lsc_poll_interval_ms) < 0) {
2411                         RTE_LOG(INFO, EAL,
2412                                         "Invalid lsc polling interval value specified for bonded"
2413                                         " device %s\n", name);
2414                         return -1;
2415                 }
2416
2417                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2418                                 != 0) {
2419                         RTE_LOG(ERR, EAL,
2420                                         "Failed to set lsc monitor polling interval (%u ms) on"
2421                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2422                         return -1;
2423                 }
2424         } else if (arg_count > 1) {
2425                 RTE_LOG(INFO, EAL,
2426                                 "LSC polling interval can be specified only once for bonded"
2427                                 " device %s\n", name);
2428                 return -1;
2429         }
2430
2431         /* Parse link up interrupt propagation delay */
2432         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2433         if (arg_count == 1) {
2434                 uint32_t link_up_delay_ms;
2435
2436                 if (rte_kvargs_process(kvlist,
2437                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2438                                 &bond_ethdev_parse_time_ms_kvarg,
2439                                 &link_up_delay_ms) < 0) {
2440                         RTE_LOG(INFO, EAL,
2441                                         "Invalid link up propagation delay value specified for"
2442                                         " bonded device %s\n", name);
2443                         return -1;
2444                 }
2445
2446                 /* Set balance mode transmit policy*/
2447                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2448                                 != 0) {
2449                         RTE_LOG(ERR, EAL,
2450                                         "Failed to set link up propagation delay (%u ms) on bonded"
2451                                         " device %s\n", link_up_delay_ms, name);
2452                         return -1;
2453                 }
2454         } else if (arg_count > 1) {
2455                 RTE_LOG(INFO, EAL,
2456                                 "Link up propagation delay can be specified only once for"
2457                                 " bonded device %s\n", name);
2458                 return -1;
2459         }
2460
2461         /* Parse link down interrupt propagation delay */
2462         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2463         if (arg_count == 1) {
2464                 uint32_t link_down_delay_ms;
2465
2466                 if (rte_kvargs_process(kvlist,
2467                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2468                                 &bond_ethdev_parse_time_ms_kvarg,
2469                                 &link_down_delay_ms) < 0) {
2470                         RTE_LOG(INFO, EAL,
2471                                         "Invalid link down propagation delay value specified for"
2472                                         " bonded device %s\n", name);
2473                         return -1;
2474                 }
2475
2476                 /* Set balance mode transmit policy*/
2477                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2478                                 != 0) {
2479                         RTE_LOG(ERR, EAL,
2480                                         "Failed to set link down propagation delay (%u ms) on"
2481                                         " bonded device %s\n", link_down_delay_ms, name);
2482                         return -1;
2483                 }
2484         } else if (arg_count > 1) {
2485                 RTE_LOG(INFO, EAL,
2486                                 "Link down propagation delay can be specified only once for"
2487                                 " bonded device %s\n", name);
2488                 return -1;
2489         }
2490
2491         return 0;
2492 }
2493
2494 static struct rte_driver bond_drv = {
2495         .name = "eth_bond",
2496         .type = PMD_VDEV,
2497         .init = bond_init,
2498         .uninit = bond_uninit,
2499 };
2500
2501 PMD_REGISTER_DRIVER(bond_drv);