340d79312fdad621ab8098d84a7d5440d7dd059e
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_ip_frag.h>
43 #include <rte_devargs.h>
44 #include <rte_kvargs.h>
45 #include <rte_vdev.h>
46 #include <rte_alarm.h>
47 #include <rte_cycles.h>
48
49 #include "rte_eth_bond.h"
50 #include "rte_eth_bond_private.h"
51 #include "rte_eth_bond_8023ad_private.h"
52
53 #define REORDER_PERIOD_MS 10
54
55 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56
57 /* Table for statistics in mode 5 TLB */
58 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
59
60 static inline size_t
61 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62 {
63         size_t vlan_offset = 0;
64
65         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
66                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
67
68                 vlan_offset = sizeof(struct vlan_hdr);
69                 *proto = vlan_hdr->eth_proto;
70
71                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
72                         vlan_hdr = vlan_hdr + 1;
73                         *proto = vlan_hdr->eth_proto;
74                         vlan_offset += sizeof(struct vlan_hdr);
75                 }
76         }
77         return vlan_offset;
78 }
79
80 static uint16_t
81 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
82 {
83         struct bond_dev_private *internals;
84
85         uint16_t num_rx_slave = 0;
86         uint16_t num_rx_total = 0;
87
88         int i;
89
90         /* Cast to structure, containing bonded device's port id and queue id */
91         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92
93         internals = bd_rx_q->dev_private;
94
95
96         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
97                 /* Offset of pointer to *bufs increases as packets are received
98                  * from other slaves */
99                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
100                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
101                 if (num_rx_slave) {
102                         num_rx_total += num_rx_slave;
103                         nb_pkts -= num_rx_slave;
104                 }
105         }
106
107         return num_rx_total;
108 }
109
110 static uint16_t
111 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
112                 uint16_t nb_pkts)
113 {
114         struct bond_dev_private *internals;
115
116         /* Cast to structure, containing bonded device's port id and queue id */
117         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
118
119         internals = bd_rx_q->dev_private;
120
121         return rte_eth_rx_burst(internals->current_primary_port,
122                         bd_rx_q->queue_id, bufs, nb_pkts);
123 }
124
125 static inline uint8_t
126 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
127 {
128         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
129
130         return !vlan_tci && (ethertype == ether_type_slow_be &&
131                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
132 }
133
134 static uint16_t
135 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
136                 uint16_t nb_pkts)
137 {
138         /* Cast to structure, containing bonded device's port id and queue id */
139         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
140         struct bond_dev_private *internals = bd_rx_q->dev_private;
141         struct ether_addr bond_mac;
142
143         struct ether_hdr *hdr;
144
145         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
146         uint16_t num_rx_total = 0;      /* Total number of received packets */
147         uint8_t slaves[RTE_MAX_ETHPORTS];
148         uint8_t slave_count, idx;
149
150         uint8_t collecting;  /* current slave collecting status */
151         const uint8_t promisc = internals->promiscuous_en;
152         uint8_t i, j, k;
153         uint8_t subtype;
154
155         rte_eth_macaddr_get(internals->port_id, &bond_mac);
156         /* Copy slave list to protect against slave up/down changes during tx
157          * bursting */
158         slave_count = internals->active_slave_count;
159         memcpy(slaves, internals->active_slaves,
160                         sizeof(internals->active_slaves[0]) * slave_count);
161
162         idx = internals->active_slave;
163         if (idx >= slave_count) {
164                 internals->active_slave = 0;
165                 idx = 0;
166         }
167         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
168                 j = num_rx_total;
169                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
170                                          COLLECTING);
171
172                 /* Read packets from this slave */
173                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
174                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
175
176                 for (k = j; k < 2 && k < num_rx_total; k++)
177                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
178
179                 /* Handle slow protocol packets. */
180                 while (j < num_rx_total) {
181                         if (j + 3 < num_rx_total)
182                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
183
184                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
185                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
186
187                         /* Remove packet from array if it is slow packet or slave is not
188                          * in collecting state or bondign interface is not in promiscus
189                          * mode and packet address does not match. */
190                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
191                                 !collecting || (!promisc &&
192                                         !is_multicast_ether_addr(&hdr->d_addr) &&
193                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
194
195                                 if (hdr->ether_type == ether_type_slow_be) {
196                                         bond_mode_8023ad_handle_slow_pkt(
197                                             internals, slaves[idx], bufs[j]);
198                                 } else
199                                         rte_pktmbuf_free(bufs[j]);
200
201                                 /* Packet is managed by mode 4 or dropped, shift the array */
202                                 num_rx_total--;
203                                 if (j < num_rx_total) {
204                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
205                                                 (num_rx_total - j));
206                                 }
207                         } else
208                                 j++;
209                 }
210                 if (unlikely(++idx == slave_count))
211                         idx = 0;
212         }
213
214         internals->active_slave = idx;
215         return num_rx_total;
216 }
217
218 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
219 uint32_t burstnumberRX;
220 uint32_t burstnumberTX;
221
222 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
223
224 static void
225 arp_op_name(uint16_t arp_op, char *buf)
226 {
227         switch (arp_op) {
228         case ARP_OP_REQUEST:
229                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
230                 return;
231         case ARP_OP_REPLY:
232                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
233                 return;
234         case ARP_OP_REVREQUEST:
235                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
236                                 "Reverse ARP Request");
237                 return;
238         case ARP_OP_REVREPLY:
239                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
240                                 "Reverse ARP Reply");
241                 return;
242         case ARP_OP_INVREQUEST:
243                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
244                                 "Peer Identify Request");
245                 return;
246         case ARP_OP_INVREPLY:
247                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
248                                 "Peer Identify Reply");
249                 return;
250         default:
251                 break;
252         }
253         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
254         return;
255 }
256 #endif
257 #define MaxIPv4String   16
258 static void
259 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
260 {
261         uint32_t ipv4_addr;
262
263         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
264         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
265                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
266                 ipv4_addr & 0xFF);
267 }
268
269 #define MAX_CLIENTS_NUMBER      128
270 uint8_t active_clients;
271 struct client_stats_t {
272         uint8_t port;
273         uint32_t ipv4_addr;
274         uint32_t ipv4_rx_packets;
275         uint32_t ipv4_tx_packets;
276 };
277 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
278
279 static void
280 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
281 {
282         int i = 0;
283
284         for (; i < MAX_CLIENTS_NUMBER; i++)     {
285                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
286                         /* Just update RX packets number for this client */
287                         if (TXorRXindicator == &burstnumberRX)
288                                 client_stats[i].ipv4_rx_packets++;
289                         else
290                                 client_stats[i].ipv4_tx_packets++;
291                         return;
292                 }
293         }
294         /* We have a new client. Insert him to the table, and increment stats */
295         if (TXorRXindicator == &burstnumberRX)
296                 client_stats[active_clients].ipv4_rx_packets++;
297         else
298                 client_stats[active_clients].ipv4_tx_packets++;
299         client_stats[active_clients].ipv4_addr = addr;
300         client_stats[active_clients].port = port;
301         active_clients++;
302
303 }
304
305 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
306 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
307                 RTE_LOG(DEBUG, PMD, \
308                 "%s " \
309                 "port:%d " \
310                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
311                 "SrcIP:%s " \
312                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
313                 "DstIP:%s " \
314                 "%s " \
315                 "%d\n", \
316                 info, \
317                 port, \
318                 eth_h->s_addr.addr_bytes[0], \
319                 eth_h->s_addr.addr_bytes[1], \
320                 eth_h->s_addr.addr_bytes[2], \
321                 eth_h->s_addr.addr_bytes[3], \
322                 eth_h->s_addr.addr_bytes[4], \
323                 eth_h->s_addr.addr_bytes[5], \
324                 src_ip, \
325                 eth_h->d_addr.addr_bytes[0], \
326                 eth_h->d_addr.addr_bytes[1], \
327                 eth_h->d_addr.addr_bytes[2], \
328                 eth_h->d_addr.addr_bytes[3], \
329                 eth_h->d_addr.addr_bytes[4], \
330                 eth_h->d_addr.addr_bytes[5], \
331                 dst_ip, \
332                 arp_op, \
333                 ++burstnumber)
334 #endif
335
336 static void
337 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
338                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
339 {
340         struct ipv4_hdr *ipv4_h;
341 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
342         struct arp_hdr *arp_h;
343         char dst_ip[16];
344         char ArpOp[24];
345         char buf[16];
346 #endif
347         char src_ip[16];
348
349         uint16_t ether_type = eth_h->ether_type;
350         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
351
352 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
353         snprintf(buf, 16, "%s", info);
354 #endif
355
356         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
357                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
358                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
359 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
360                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
361                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
362 #endif
363                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
364         }
365 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
366         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
367                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
368                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
369                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
370                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
371                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
372         }
373 #endif
374 }
375 #endif
376
377 static uint16_t
378 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
379 {
380         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
381         struct bond_dev_private *internals = bd_tx_q->dev_private;
382         struct ether_hdr *eth_h;
383         uint16_t ether_type, offset;
384         uint16_t nb_recv_pkts;
385         int i;
386
387         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
388
389         for (i = 0; i < nb_recv_pkts; i++) {
390                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
391                 ether_type = eth_h->ether_type;
392                 offset = get_vlan_offset(eth_h, &ether_type);
393
394                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
395 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
396                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
397 #endif
398                         bond_mode_alb_arp_recv(eth_h, offset, internals);
399                 }
400 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
401                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
402                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
403 #endif
404         }
405
406         return nb_recv_pkts;
407 }
408
409 static uint16_t
410 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
411                 uint16_t nb_pkts)
412 {
413         struct bond_dev_private *internals;
414         struct bond_tx_queue *bd_tx_q;
415
416         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
417         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
418
419         uint8_t num_of_slaves;
420         uint8_t slaves[RTE_MAX_ETHPORTS];
421
422         uint16_t num_tx_total = 0, num_tx_slave;
423
424         static int slave_idx = 0;
425         int i, cslave_idx = 0, tx_fail_total = 0;
426
427         bd_tx_q = (struct bond_tx_queue *)queue;
428         internals = bd_tx_q->dev_private;
429
430         /* Copy slave list to protect against slave up/down changes during tx
431          * bursting */
432         num_of_slaves = internals->active_slave_count;
433         memcpy(slaves, internals->active_slaves,
434                         sizeof(internals->active_slaves[0]) * num_of_slaves);
435
436         if (num_of_slaves < 1)
437                 return num_tx_total;
438
439         /* Populate slaves mbuf with which packets are to be sent on it  */
440         for (i = 0; i < nb_pkts; i++) {
441                 cslave_idx = (slave_idx + i) % num_of_slaves;
442                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
443         }
444
445         /* increment current slave index so the next call to tx burst starts on the
446          * next slave */
447         slave_idx = ++cslave_idx;
448
449         /* Send packet burst on each slave device */
450         for (i = 0; i < num_of_slaves; i++) {
451                 if (slave_nb_pkts[i] > 0) {
452                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
453                                         slave_bufs[i], slave_nb_pkts[i]);
454
455                         /* if tx burst fails move packets to end of bufs */
456                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
457                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
458
459                                 tx_fail_total += tx_fail_slave;
460
461                                 memcpy(&bufs[nb_pkts - tx_fail_total],
462                                                 &slave_bufs[i][num_tx_slave],
463                                                 tx_fail_slave * sizeof(bufs[0]));
464                         }
465                         num_tx_total += num_tx_slave;
466                 }
467         }
468
469         return num_tx_total;
470 }
471
472 static uint16_t
473 bond_ethdev_tx_burst_active_backup(void *queue,
474                 struct rte_mbuf **bufs, uint16_t nb_pkts)
475 {
476         struct bond_dev_private *internals;
477         struct bond_tx_queue *bd_tx_q;
478
479         bd_tx_q = (struct bond_tx_queue *)queue;
480         internals = bd_tx_q->dev_private;
481
482         if (internals->active_slave_count < 1)
483                 return 0;
484
485         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
486                         bufs, nb_pkts);
487 }
488
489 static inline uint16_t
490 ether_hash(struct ether_hdr *eth_hdr)
491 {
492         unaligned_uint16_t *word_src_addr =
493                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
494         unaligned_uint16_t *word_dst_addr =
495                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
496
497         return (word_src_addr[0] ^ word_dst_addr[0]) ^
498                         (word_src_addr[1] ^ word_dst_addr[1]) ^
499                         (word_src_addr[2] ^ word_dst_addr[2]);
500 }
501
502 static inline uint32_t
503 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
504 {
505         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
506 }
507
508 static inline uint32_t
509 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
510 {
511         unaligned_uint32_t *word_src_addr =
512                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
513         unaligned_uint32_t *word_dst_addr =
514                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
515
516         return (word_src_addr[0] ^ word_dst_addr[0]) ^
517                         (word_src_addr[1] ^ word_dst_addr[1]) ^
518                         (word_src_addr[2] ^ word_dst_addr[2]) ^
519                         (word_src_addr[3] ^ word_dst_addr[3]);
520 }
521
522 uint16_t
523 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
524 {
525         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
526
527         uint32_t hash = ether_hash(eth_hdr);
528
529         return (hash ^= hash >> 8) % slave_count;
530 }
531
532 uint16_t
533 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
534 {
535         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
536         uint16_t proto = eth_hdr->ether_type;
537         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
538         uint32_t hash, l3hash = 0;
539
540         hash = ether_hash(eth_hdr);
541
542         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
543                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
544                                 ((char *)(eth_hdr + 1) + vlan_offset);
545                 l3hash = ipv4_hash(ipv4_hdr);
546
547         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
548                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
549                                 ((char *)(eth_hdr + 1) + vlan_offset);
550                 l3hash = ipv6_hash(ipv6_hdr);
551         }
552
553         hash = hash ^ l3hash;
554         hash ^= hash >> 16;
555         hash ^= hash >> 8;
556
557         return hash % slave_count;
558 }
559
560 uint16_t
561 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
562 {
563         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
564         uint16_t proto = eth_hdr->ether_type;
565         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
566
567         struct udp_hdr *udp_hdr = NULL;
568         struct tcp_hdr *tcp_hdr = NULL;
569         uint32_t hash, l3hash = 0, l4hash = 0;
570
571         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
572                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
573                                 ((char *)(eth_hdr + 1) + vlan_offset);
574                 size_t ip_hdr_offset;
575
576                 l3hash = ipv4_hash(ipv4_hdr);
577
578                 /* there is no L4 header in fragmented packet */
579                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
580                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
581                                         IPV4_IHL_MULTIPLIER;
582
583                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
584                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
585                                                 ip_hdr_offset);
586                                 l4hash = HASH_L4_PORTS(tcp_hdr);
587                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
588                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
589                                                 ip_hdr_offset);
590                                 l4hash = HASH_L4_PORTS(udp_hdr);
591                         }
592                 }
593         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
594                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
595                                 ((char *)(eth_hdr + 1) + vlan_offset);
596                 l3hash = ipv6_hash(ipv6_hdr);
597
598                 if (ipv6_hdr->proto == IPPROTO_TCP) {
599                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
600                         l4hash = HASH_L4_PORTS(tcp_hdr);
601                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
602                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
603                         l4hash = HASH_L4_PORTS(udp_hdr);
604                 }
605         }
606
607         hash = l3hash ^ l4hash;
608         hash ^= hash >> 16;
609         hash ^= hash >> 8;
610
611         return hash % slave_count;
612 }
613
614 struct bwg_slave {
615         uint64_t bwg_left_int;
616         uint64_t bwg_left_remainder;
617         uint8_t slave;
618 };
619
620 void
621 bond_tlb_activate_slave(struct bond_dev_private *internals) {
622         int i;
623
624         for (i = 0; i < internals->active_slave_count; i++) {
625                 tlb_last_obytets[internals->active_slaves[i]] = 0;
626         }
627 }
628
629 static int
630 bandwidth_cmp(const void *a, const void *b)
631 {
632         const struct bwg_slave *bwg_a = a;
633         const struct bwg_slave *bwg_b = b;
634         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
635         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
636                         (int64_t)bwg_a->bwg_left_remainder;
637         if (diff > 0)
638                 return 1;
639         else if (diff < 0)
640                 return -1;
641         else if (diff2 > 0)
642                 return 1;
643         else if (diff2 < 0)
644                 return -1;
645         else
646                 return 0;
647 }
648
649 static void
650 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
651                 struct bwg_slave *bwg_slave)
652 {
653         struct rte_eth_link link_status;
654
655         rte_eth_link_get(port_id, &link_status);
656         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
657         if (link_bwg == 0)
658                 return;
659         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
660         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
661         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
662 }
663
664 static void
665 bond_ethdev_update_tlb_slave_cb(void *arg)
666 {
667         struct bond_dev_private *internals = arg;
668         struct rte_eth_stats slave_stats;
669         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
670         uint8_t slave_count;
671         uint64_t tx_bytes;
672
673         uint8_t update_stats = 0;
674         uint8_t i, slave_id;
675
676         internals->slave_update_idx++;
677
678
679         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
680                 update_stats = 1;
681
682         for (i = 0; i < internals->active_slave_count; i++) {
683                 slave_id = internals->active_slaves[i];
684                 rte_eth_stats_get(slave_id, &slave_stats);
685                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
686                 bandwidth_left(slave_id, tx_bytes,
687                                 internals->slave_update_idx, &bwg_array[i]);
688                 bwg_array[i].slave = slave_id;
689
690                 if (update_stats) {
691                         tlb_last_obytets[slave_id] = slave_stats.obytes;
692                 }
693         }
694
695         if (update_stats == 1)
696                 internals->slave_update_idx = 0;
697
698         slave_count = i;
699         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
700         for (i = 0; i < slave_count; i++)
701                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
702
703         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
704                         (struct bond_dev_private *)internals);
705 }
706
707 static uint16_t
708 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
709 {
710         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
711         struct bond_dev_private *internals = bd_tx_q->dev_private;
712
713         struct rte_eth_dev *primary_port =
714                         &rte_eth_devices[internals->primary_port];
715         uint16_t num_tx_total = 0;
716         uint8_t i, j;
717
718         uint8_t num_of_slaves = internals->active_slave_count;
719         uint8_t slaves[RTE_MAX_ETHPORTS];
720
721         struct ether_hdr *ether_hdr;
722         struct ether_addr primary_slave_addr;
723         struct ether_addr active_slave_addr;
724
725         if (num_of_slaves < 1)
726                 return num_tx_total;
727
728         memcpy(slaves, internals->tlb_slaves_order,
729                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
730
731
732         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
733
734         if (nb_pkts > 3) {
735                 for (i = 0; i < 3; i++)
736                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
737         }
738
739         for (i = 0; i < num_of_slaves; i++) {
740                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
741                 for (j = num_tx_total; j < nb_pkts; j++) {
742                         if (j + 3 < nb_pkts)
743                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
744
745                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
746                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
747                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
748 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
749                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
750 #endif
751                 }
752
753                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
754                                 bufs + num_tx_total, nb_pkts - num_tx_total);
755
756                 if (num_tx_total == nb_pkts)
757                         break;
758         }
759
760         return num_tx_total;
761 }
762
763 void
764 bond_tlb_disable(struct bond_dev_private *internals)
765 {
766         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
767 }
768
769 void
770 bond_tlb_enable(struct bond_dev_private *internals)
771 {
772         bond_ethdev_update_tlb_slave_cb(internals);
773 }
774
775 static uint16_t
776 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
777 {
778         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
779         struct bond_dev_private *internals = bd_tx_q->dev_private;
780
781         struct ether_hdr *eth_h;
782         uint16_t ether_type, offset;
783
784         struct client_data *client_info;
785
786         /*
787          * We create transmit buffers for every slave and one additional to send
788          * through tlb. In worst case every packet will be send on one port.
789          */
790         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
791         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
792
793         /*
794          * We create separate transmit buffers for update packets as they wont be
795          * counted in num_tx_total.
796          */
797         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
798         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
799
800         struct rte_mbuf *upd_pkt;
801         size_t pkt_size;
802
803         uint16_t num_send, num_not_send = 0;
804         uint16_t num_tx_total = 0;
805         uint8_t slave_idx;
806
807         int i, j;
808
809         /* Search tx buffer for ARP packets and forward them to alb */
810         for (i = 0; i < nb_pkts; i++) {
811                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
812                 ether_type = eth_h->ether_type;
813                 offset = get_vlan_offset(eth_h, &ether_type);
814
815                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
816                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
817
818                         /* Change src mac in eth header */
819                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
820
821                         /* Add packet to slave tx buffer */
822                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
823                         slave_bufs_pkts[slave_idx]++;
824                 } else {
825                         /* If packet is not ARP, send it with TLB policy */
826                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
827                                         bufs[i];
828                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
829                 }
830         }
831
832         /* Update connected client ARP tables */
833         if (internals->mode6.ntt) {
834                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
835                         client_info = &internals->mode6.client_table[i];
836
837                         if (client_info->in_use) {
838                                 /* Allocate new packet to send ARP update on current slave */
839                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
840                                 if (upd_pkt == NULL) {
841                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
842                                         continue;
843                                 }
844                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
845                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
846                                 upd_pkt->data_len = pkt_size;
847                                 upd_pkt->pkt_len = pkt_size;
848
849                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
850                                                 internals);
851
852                                 /* Add packet to update tx buffer */
853                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
854                                 update_bufs_pkts[slave_idx]++;
855                         }
856                 }
857                 internals->mode6.ntt = 0;
858         }
859
860         /* Send ARP packets on proper slaves */
861         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
862                 if (slave_bufs_pkts[i] > 0) {
863                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
864                                         slave_bufs[i], slave_bufs_pkts[i]);
865                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
866                                 bufs[nb_pkts - 1 - num_not_send - j] =
867                                                 slave_bufs[i][nb_pkts - 1 - j];
868                         }
869
870                         num_tx_total += num_send;
871                         num_not_send += slave_bufs_pkts[i] - num_send;
872
873 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
874         /* Print TX stats including update packets */
875                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
876                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
877                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
878                         }
879 #endif
880                 }
881         }
882
883         /* Send update packets on proper slaves */
884         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
885                 if (update_bufs_pkts[i] > 0) {
886                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
887                                         update_bufs_pkts[i]);
888                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
889                                 rte_pktmbuf_free(update_bufs[i][j]);
890                         }
891 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
892                         for (j = 0; j < update_bufs_pkts[i]; j++) {
893                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
894                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
895                         }
896 #endif
897                 }
898         }
899
900         /* Send non-ARP packets using tlb policy */
901         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
902                 num_send = bond_ethdev_tx_burst_tlb(queue,
903                                 slave_bufs[RTE_MAX_ETHPORTS],
904                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
905
906                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
907                         bufs[nb_pkts - 1 - num_not_send - j] =
908                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
909                 }
910
911                 num_tx_total += num_send;
912         }
913
914         return num_tx_total;
915 }
916
917 static uint16_t
918 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
919                 uint16_t nb_pkts)
920 {
921         struct bond_dev_private *internals;
922         struct bond_tx_queue *bd_tx_q;
923
924         uint8_t num_of_slaves;
925         uint8_t slaves[RTE_MAX_ETHPORTS];
926
927         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
928
929         int i, op_slave_id;
930
931         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
932         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
933
934         bd_tx_q = (struct bond_tx_queue *)queue;
935         internals = bd_tx_q->dev_private;
936
937         /* Copy slave list to protect against slave up/down changes during tx
938          * bursting */
939         num_of_slaves = internals->active_slave_count;
940         memcpy(slaves, internals->active_slaves,
941                         sizeof(internals->active_slaves[0]) * num_of_slaves);
942
943         if (num_of_slaves < 1)
944                 return num_tx_total;
945
946         /* Populate slaves mbuf with the packets which are to be sent on it  */
947         for (i = 0; i < nb_pkts; i++) {
948                 /* Select output slave using hash based on xmit policy */
949                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
950
951                 /* Populate slave mbuf arrays with mbufs for that slave */
952                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
953         }
954
955         /* Send packet burst on each slave device */
956         for (i = 0; i < num_of_slaves; i++) {
957                 if (slave_nb_pkts[i] > 0) {
958                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
959                                         slave_bufs[i], slave_nb_pkts[i]);
960
961                         /* if tx burst fails move packets to end of bufs */
962                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
963                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
964
965                                 tx_fail_total += slave_tx_fail_count;
966                                 memcpy(&bufs[nb_pkts - tx_fail_total],
967                                                 &slave_bufs[i][num_tx_slave],
968                                                 slave_tx_fail_count * sizeof(bufs[0]));
969                         }
970
971                         num_tx_total += num_tx_slave;
972                 }
973         }
974
975         return num_tx_total;
976 }
977
978 static uint16_t
979 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
980                 uint16_t nb_pkts)
981 {
982         struct bond_dev_private *internals;
983         struct bond_tx_queue *bd_tx_q;
984
985         uint8_t num_of_slaves;
986         uint8_t slaves[RTE_MAX_ETHPORTS];
987          /* positions in slaves, not ID */
988         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
989         uint8_t distributing_count;
990
991         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
992         uint16_t i, j, op_slave_idx;
993         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
994
995         /* Allocate additional packets in case 8023AD mode. */
996         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
997         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
998
999         /* Total amount of packets in slave_bufs */
1000         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1001         /* Slow packets placed in each slave */
1002         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1003
1004         bd_tx_q = (struct bond_tx_queue *)queue;
1005         internals = bd_tx_q->dev_private;
1006
1007         /* Copy slave list to protect against slave up/down changes during tx
1008          * bursting */
1009         num_of_slaves = internals->active_slave_count;
1010         if (num_of_slaves < 1)
1011                 return num_tx_total;
1012
1013         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1014
1015         distributing_count = 0;
1016         for (i = 0; i < num_of_slaves; i++) {
1017                 struct port *port = &mode_8023ad_ports[slaves[i]];
1018
1019                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1020                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1021                                 NULL);
1022                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1023
1024                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1025                         slave_bufs[i][j] = slow_pkts[j];
1026
1027                 if (ACTOR_STATE(port, DISTRIBUTING))
1028                         distributing_offsets[distributing_count++] = i;
1029         }
1030
1031         if (likely(distributing_count > 0)) {
1032                 /* Populate slaves mbuf with the packets which are to be sent on it */
1033                 for (i = 0; i < nb_pkts; i++) {
1034                         /* Select output slave using hash based on xmit policy */
1035                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1036
1037                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1038                          * slaves that are currently distributing. */
1039                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1040                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1041                         slave_nb_pkts[slave_offset]++;
1042                 }
1043         }
1044
1045         /* Send packet burst on each slave device */
1046         for (i = 0; i < num_of_slaves; i++) {
1047                 if (slave_nb_pkts[i] == 0)
1048                         continue;
1049
1050                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1051                                 slave_bufs[i], slave_nb_pkts[i]);
1052
1053                 /* If tx burst fails drop slow packets */
1054                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1055                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1056
1057                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1058                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1059
1060                 /* If tx burst fails move packets to end of bufs */
1061                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1062                         uint16_t j = nb_pkts - num_tx_fail_total;
1063                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1064                                 bufs[j] = slave_bufs[i][num_tx_slave];
1065                 }
1066         }
1067
1068         return num_tx_total;
1069 }
1070
1071 static uint16_t
1072 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1073                 uint16_t nb_pkts)
1074 {
1075         struct bond_dev_private *internals;
1076         struct bond_tx_queue *bd_tx_q;
1077
1078         uint8_t tx_failed_flag = 0, num_of_slaves;
1079         uint8_t slaves[RTE_MAX_ETHPORTS];
1080
1081         uint16_t max_nb_of_tx_pkts = 0;
1082
1083         int slave_tx_total[RTE_MAX_ETHPORTS];
1084         int i, most_successful_tx_slave = -1;
1085
1086         bd_tx_q = (struct bond_tx_queue *)queue;
1087         internals = bd_tx_q->dev_private;
1088
1089         /* Copy slave list to protect against slave up/down changes during tx
1090          * bursting */
1091         num_of_slaves = internals->active_slave_count;
1092         memcpy(slaves, internals->active_slaves,
1093                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1094
1095         if (num_of_slaves < 1)
1096                 return 0;
1097
1098         /* Increment reference count on mbufs */
1099         for (i = 0; i < nb_pkts; i++)
1100                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1101
1102         /* Transmit burst on each active slave */
1103         for (i = 0; i < num_of_slaves; i++) {
1104                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1105                                         bufs, nb_pkts);
1106
1107                 if (unlikely(slave_tx_total[i] < nb_pkts))
1108                         tx_failed_flag = 1;
1109
1110                 /* record the value and slave index for the slave which transmits the
1111                  * maximum number of packets */
1112                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1113                         max_nb_of_tx_pkts = slave_tx_total[i];
1114                         most_successful_tx_slave = i;
1115                 }
1116         }
1117
1118         /* if slaves fail to transmit packets from burst, the calling application
1119          * is not expected to know about multiple references to packets so we must
1120          * handle failures of all packets except those of the most successful slave
1121          */
1122         if (unlikely(tx_failed_flag))
1123                 for (i = 0; i < num_of_slaves; i++)
1124                         if (i != most_successful_tx_slave)
1125                                 while (slave_tx_total[i] < nb_pkts)
1126                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1127
1128         return max_nb_of_tx_pkts;
1129 }
1130
1131 void
1132 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1133                 struct rte_eth_link *slave_dev_link)
1134 {
1135         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1136         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1137
1138         if (slave_dev_link->link_status &&
1139                 bonded_eth_dev->data->dev_started) {
1140                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1141                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1142
1143                 internals->link_props_set = 1;
1144         }
1145 }
1146
1147 void
1148 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1149 {
1150         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1151
1152         memset(&(bonded_eth_dev->data->dev_link), 0,
1153                         sizeof(bonded_eth_dev->data->dev_link));
1154
1155         internals->link_props_set = 0;
1156 }
1157
1158 int
1159 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1160                 struct rte_eth_link *slave_dev_link)
1161 {
1162         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1163                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1164                 return -1;
1165
1166         return 0;
1167 }
1168
1169 int
1170 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1171 {
1172         struct ether_addr *mac_addr;
1173
1174         if (eth_dev == NULL) {
1175                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1176                 return -1;
1177         }
1178
1179         if (dst_mac_addr == NULL) {
1180                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1181                 return -1;
1182         }
1183
1184         mac_addr = eth_dev->data->mac_addrs;
1185
1186         ether_addr_copy(mac_addr, dst_mac_addr);
1187         return 0;
1188 }
1189
1190 int
1191 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1192 {
1193         struct ether_addr *mac_addr;
1194
1195         if (eth_dev == NULL) {
1196                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1197                 return -1;
1198         }
1199
1200         if (new_mac_addr == NULL) {
1201                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1202                 return -1;
1203         }
1204
1205         mac_addr = eth_dev->data->mac_addrs;
1206
1207         /* If new MAC is different to current MAC then update */
1208         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1209                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1210
1211         return 0;
1212 }
1213
1214 int
1215 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1216 {
1217         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1218         int i;
1219
1220         /* Update slave devices MAC addresses */
1221         if (internals->slave_count < 1)
1222                 return -1;
1223
1224         switch (internals->mode) {
1225         case BONDING_MODE_ROUND_ROBIN:
1226         case BONDING_MODE_BALANCE:
1227         case BONDING_MODE_BROADCAST:
1228                 for (i = 0; i < internals->slave_count; i++) {
1229                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1230                                         bonded_eth_dev->data->mac_addrs)) {
1231                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1232                                                 internals->slaves[i].port_id);
1233                                 return -1;
1234                         }
1235                 }
1236                 break;
1237         case BONDING_MODE_8023AD:
1238                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1239                 break;
1240         case BONDING_MODE_ACTIVE_BACKUP:
1241         case BONDING_MODE_TLB:
1242         case BONDING_MODE_ALB:
1243         default:
1244                 for (i = 0; i < internals->slave_count; i++) {
1245                         if (internals->slaves[i].port_id ==
1246                                         internals->current_primary_port) {
1247                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1248                                                 bonded_eth_dev->data->mac_addrs)) {
1249                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1250                                                         internals->current_primary_port);
1251                                         return -1;
1252                                 }
1253                         } else {
1254                                 if (mac_address_set(
1255                                                 &rte_eth_devices[internals->slaves[i].port_id],
1256                                                 &internals->slaves[i].persisted_mac_addr)) {
1257                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1258                                                         internals->slaves[i].port_id);
1259                                         return -1;
1260                                 }
1261                         }
1262                 }
1263         }
1264
1265         return 0;
1266 }
1267
1268 int
1269 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1270 {
1271         struct bond_dev_private *internals;
1272
1273         internals = eth_dev->data->dev_private;
1274
1275         switch (mode) {
1276         case BONDING_MODE_ROUND_ROBIN:
1277                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1278                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1279                 break;
1280         case BONDING_MODE_ACTIVE_BACKUP:
1281                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1282                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1283                 break;
1284         case BONDING_MODE_BALANCE:
1285                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1286                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1287                 break;
1288         case BONDING_MODE_BROADCAST:
1289                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1290                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1291                 break;
1292         case BONDING_MODE_8023AD:
1293                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1294                         return -1;
1295
1296                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1297                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1298                 RTE_LOG(WARNING, PMD,
1299                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1300                                 "at least every 100ms.\n");
1301                 break;
1302         case BONDING_MODE_TLB:
1303                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1304                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1305                 break;
1306         case BONDING_MODE_ALB:
1307                 if (bond_mode_alb_enable(eth_dev) != 0)
1308                         return -1;
1309
1310                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1311                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1312                 break;
1313         default:
1314                 return -1;
1315         }
1316
1317         internals->mode = mode;
1318
1319         return 0;
1320 }
1321
1322 int
1323 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1324                 struct rte_eth_dev *slave_eth_dev)
1325 {
1326         struct bond_rx_queue *bd_rx_q;
1327         struct bond_tx_queue *bd_tx_q;
1328
1329         int errval;
1330         uint16_t q_id;
1331
1332         /* Stop slave */
1333         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1334
1335         /* Enable interrupts on slave device if supported */
1336         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1337                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1338
1339         /* If RSS is enabled for bonding, try to enable it for slaves  */
1340         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1341                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1342                                 != 0) {
1343                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1344                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1345                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1346                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1347                 } else {
1348                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1349                 }
1350
1351                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1352                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1353                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1354                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1355         }
1356
1357         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1358                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1359
1360         /* Configure device */
1361         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1362                         bonded_eth_dev->data->nb_rx_queues,
1363                         bonded_eth_dev->data->nb_tx_queues,
1364                         &(slave_eth_dev->data->dev_conf));
1365         if (errval != 0) {
1366                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1367                                 slave_eth_dev->data->port_id, errval);
1368                 return errval;
1369         }
1370
1371         /* Setup Rx Queues */
1372         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1373                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1374
1375                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1376                                 bd_rx_q->nb_rx_desc,
1377                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1378                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1379                 if (errval != 0) {
1380                         RTE_BOND_LOG(ERR,
1381                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1382                                         slave_eth_dev->data->port_id, q_id, errval);
1383                         return errval;
1384                 }
1385         }
1386
1387         /* Setup Tx Queues */
1388         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1389                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1390
1391                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1392                                 bd_tx_q->nb_tx_desc,
1393                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1394                                 &bd_tx_q->tx_conf);
1395                 if (errval != 0) {
1396                         RTE_BOND_LOG(ERR,
1397                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1398                                         slave_eth_dev->data->port_id, q_id, errval);
1399                         return errval;
1400                 }
1401         }
1402
1403         /* Start device */
1404         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1405         if (errval != 0) {
1406                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1407                                 slave_eth_dev->data->port_id, errval);
1408                 return -1;
1409         }
1410
1411         /* If RSS is enabled for bonding, synchronize RETA */
1412         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1413                 int i;
1414                 struct bond_dev_private *internals;
1415
1416                 internals = bonded_eth_dev->data->dev_private;
1417
1418                 for (i = 0; i < internals->slave_count; i++) {
1419                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1420                                 errval = rte_eth_dev_rss_reta_update(
1421                                                 slave_eth_dev->data->port_id,
1422                                                 &internals->reta_conf[0],
1423                                                 internals->slaves[i].reta_size);
1424                                 if (errval != 0) {
1425                                         RTE_LOG(WARNING, PMD,
1426                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1427                                                         " RSS Configuration for bonding may be inconsistent.\n",
1428                                                         slave_eth_dev->data->port_id, errval);
1429                                 }
1430                                 break;
1431                         }
1432                 }
1433         }
1434
1435         /* If lsc interrupt is set, check initial slave's link status */
1436         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1437                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1438                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1439
1440         return 0;
1441 }
1442
1443 void
1444 slave_remove(struct bond_dev_private *internals,
1445                 struct rte_eth_dev *slave_eth_dev)
1446 {
1447         uint8_t i;
1448
1449         for (i = 0; i < internals->slave_count; i++)
1450                 if (internals->slaves[i].port_id ==
1451                                 slave_eth_dev->data->port_id)
1452                         break;
1453
1454         if (i < (internals->slave_count - 1))
1455                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1456                                 sizeof(internals->slaves[0]) *
1457                                 (internals->slave_count - i - 1));
1458
1459         internals->slave_count--;
1460
1461         /* force reconfiguration of slave interfaces */
1462         _rte_eth_dev_reset(slave_eth_dev);
1463 }
1464
1465 static void
1466 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1467
1468 void
1469 slave_add(struct bond_dev_private *internals,
1470                 struct rte_eth_dev *slave_eth_dev)
1471 {
1472         struct bond_slave_details *slave_details =
1473                         &internals->slaves[internals->slave_count];
1474
1475         slave_details->port_id = slave_eth_dev->data->port_id;
1476         slave_details->last_link_status = 0;
1477
1478         /* Mark slave devices that don't support interrupts so we can
1479          * compensate when we start the bond
1480          */
1481         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1482                 slave_details->link_status_poll_enabled = 1;
1483         }
1484
1485         slave_details->link_status_wait_to_complete = 0;
1486         /* clean tlb_last_obytes when adding port for bonding device */
1487         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1488                         sizeof(struct ether_addr));
1489 }
1490
1491 void
1492 bond_ethdev_primary_set(struct bond_dev_private *internals,
1493                 uint8_t slave_port_id)
1494 {
1495         int i;
1496
1497         if (internals->active_slave_count < 1)
1498                 internals->current_primary_port = slave_port_id;
1499         else
1500                 /* Search bonded device slave ports for new proposed primary port */
1501                 for (i = 0; i < internals->active_slave_count; i++) {
1502                         if (internals->active_slaves[i] == slave_port_id)
1503                                 internals->current_primary_port = slave_port_id;
1504                 }
1505 }
1506
1507 static void
1508 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1509
1510 static int
1511 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1512 {
1513         struct bond_dev_private *internals;
1514         int i;
1515
1516         /* slave eth dev will be started by bonded device */
1517         if (check_for_bonded_ethdev(eth_dev)) {
1518                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1519                                 eth_dev->data->port_id);
1520                 return -1;
1521         }
1522
1523         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1524         eth_dev->data->dev_started = 1;
1525
1526         internals = eth_dev->data->dev_private;
1527
1528         if (internals->slave_count == 0) {
1529                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1530                 return -1;
1531         }
1532
1533         if (internals->user_defined_mac == 0) {
1534                 struct ether_addr *new_mac_addr = NULL;
1535
1536                 for (i = 0; i < internals->slave_count; i++)
1537                         if (internals->slaves[i].port_id == internals->primary_port)
1538                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1539
1540                 if (new_mac_addr == NULL)
1541                         return -1;
1542
1543                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1544                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1545                                         eth_dev->data->port_id);
1546                         return -1;
1547                 }
1548         }
1549
1550         /* Update all slave devices MACs*/
1551         if (mac_address_slaves_update(eth_dev) != 0)
1552                 return -1;
1553
1554         /* If bonded device is configure in promiscuous mode then re-apply config */
1555         if (internals->promiscuous_en)
1556                 bond_ethdev_promiscuous_enable(eth_dev);
1557
1558         /* Reconfigure each slave device if starting bonded device */
1559         for (i = 0; i < internals->slave_count; i++) {
1560                 if (slave_configure(eth_dev,
1561                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1562                         RTE_BOND_LOG(ERR,
1563                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1564                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1565                         return -1;
1566                 }
1567                 /* We will need to poll for link status if any slave doesn't
1568                  * support interrupts
1569                  */
1570                 if (internals->slaves[i].link_status_poll_enabled)
1571                         internals->link_status_polling_enabled = 1;
1572         }
1573         /* start polling if needed */
1574         if (internals->link_status_polling_enabled) {
1575                 rte_eal_alarm_set(
1576                         internals->link_status_polling_interval_ms * 1000,
1577                         bond_ethdev_slave_link_status_change_monitor,
1578                         (void *)&rte_eth_devices[internals->port_id]);
1579         }
1580
1581         if (internals->user_defined_primary_port)
1582                 bond_ethdev_primary_set(internals, internals->primary_port);
1583
1584         if (internals->mode == BONDING_MODE_8023AD)
1585                 bond_mode_8023ad_start(eth_dev);
1586
1587         if (internals->mode == BONDING_MODE_TLB ||
1588                         internals->mode == BONDING_MODE_ALB)
1589                 bond_tlb_enable(internals);
1590
1591         return 0;
1592 }
1593
1594 static void
1595 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1596 {
1597         uint8_t i;
1598
1599         if (dev->data->rx_queues != NULL) {
1600                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1601                         rte_free(dev->data->rx_queues[i]);
1602                         dev->data->rx_queues[i] = NULL;
1603                 }
1604                 dev->data->nb_rx_queues = 0;
1605         }
1606
1607         if (dev->data->tx_queues != NULL) {
1608                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1609                         rte_free(dev->data->tx_queues[i]);
1610                         dev->data->tx_queues[i] = NULL;
1611                 }
1612                 dev->data->nb_tx_queues = 0;
1613         }
1614 }
1615
1616 void
1617 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1618 {
1619         struct bond_dev_private *internals = eth_dev->data->dev_private;
1620         uint8_t i;
1621
1622         if (internals->mode == BONDING_MODE_8023AD) {
1623                 struct port *port;
1624                 void *pkt = NULL;
1625
1626                 bond_mode_8023ad_stop(eth_dev);
1627
1628                 /* Discard all messages to/from mode 4 state machines */
1629                 for (i = 0; i < internals->active_slave_count; i++) {
1630                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1631
1632                         RTE_ASSERT(port->rx_ring != NULL);
1633                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1634                                 rte_pktmbuf_free(pkt);
1635
1636                         RTE_ASSERT(port->tx_ring != NULL);
1637                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1638                                 rte_pktmbuf_free(pkt);
1639                 }
1640         }
1641
1642         if (internals->mode == BONDING_MODE_TLB ||
1643                         internals->mode == BONDING_MODE_ALB) {
1644                 bond_tlb_disable(internals);
1645                 for (i = 0; i < internals->active_slave_count; i++)
1646                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1647         }
1648
1649         internals->active_slave_count = 0;
1650         internals->link_status_polling_enabled = 0;
1651         for (i = 0; i < internals->slave_count; i++)
1652                 internals->slaves[i].last_link_status = 0;
1653
1654         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1655         eth_dev->data->dev_started = 0;
1656 }
1657
1658 void
1659 bond_ethdev_close(struct rte_eth_dev *dev)
1660 {
1661         struct bond_dev_private *internals = dev->data->dev_private;
1662
1663         bond_ethdev_free_queues(dev);
1664         rte_bitmap_reset(internals->vlan_filter_bmp);
1665 }
1666
1667 /* forward declaration */
1668 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1669
1670 static void
1671 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1672 {
1673         struct bond_dev_private *internals = dev->data->dev_private;
1674
1675         dev_info->max_mac_addrs = 1;
1676
1677         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
1678                                   internals->candidate_max_rx_pktlen : 2048;
1679
1680         dev_info->max_rx_queues = (uint16_t)128;
1681         dev_info->max_tx_queues = (uint16_t)512;
1682
1683         dev_info->min_rx_bufsize = 0;
1684
1685         dev_info->rx_offload_capa = internals->rx_offload_capa;
1686         dev_info->tx_offload_capa = internals->tx_offload_capa;
1687         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1688
1689         dev_info->reta_size = internals->reta_size;
1690 }
1691
1692 static int
1693 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1694 {
1695         int res;
1696         uint8_t i;
1697         struct bond_dev_private *internals = dev->data->dev_private;
1698
1699         /* don't do this while a slave is being added */
1700         rte_spinlock_lock(&internals->lock);
1701
1702         if (on)
1703                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
1704         else
1705                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
1706
1707         for (i = 0; i < internals->slave_count; i++) {
1708                 uint8_t port_id = internals->slaves[i].port_id;
1709
1710                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
1711                 if (res == ENOTSUP)
1712                         RTE_LOG(WARNING, PMD,
1713                                 "Setting VLAN filter on slave port %u not supported.\n",
1714                                 port_id);
1715         }
1716
1717         rte_spinlock_unlock(&internals->lock);
1718         return 0;
1719 }
1720
1721 static int
1722 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1723                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1724                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1725 {
1726         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1727                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1728                                         0, dev->data->numa_node);
1729         if (bd_rx_q == NULL)
1730                 return -1;
1731
1732         bd_rx_q->queue_id = rx_queue_id;
1733         bd_rx_q->dev_private = dev->data->dev_private;
1734
1735         bd_rx_q->nb_rx_desc = nb_rx_desc;
1736
1737         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1738         bd_rx_q->mb_pool = mb_pool;
1739
1740         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1741
1742         return 0;
1743 }
1744
1745 static int
1746 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1747                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1748                 const struct rte_eth_txconf *tx_conf)
1749 {
1750         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1751                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1752                                         0, dev->data->numa_node);
1753
1754         if (bd_tx_q == NULL)
1755                 return -1;
1756
1757         bd_tx_q->queue_id = tx_queue_id;
1758         bd_tx_q->dev_private = dev->data->dev_private;
1759
1760         bd_tx_q->nb_tx_desc = nb_tx_desc;
1761         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1762
1763         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1764
1765         return 0;
1766 }
1767
1768 static void
1769 bond_ethdev_rx_queue_release(void *queue)
1770 {
1771         if (queue == NULL)
1772                 return;
1773
1774         rte_free(queue);
1775 }
1776
1777 static void
1778 bond_ethdev_tx_queue_release(void *queue)
1779 {
1780         if (queue == NULL)
1781                 return;
1782
1783         rte_free(queue);
1784 }
1785
1786 static void
1787 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1788 {
1789         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1790         struct bond_dev_private *internals;
1791
1792         /* Default value for polling slave found is true as we don't want to
1793          * disable the polling thread if we cannot get the lock */
1794         int i, polling_slave_found = 1;
1795
1796         if (cb_arg == NULL)
1797                 return;
1798
1799         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1800         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1801
1802         if (!bonded_ethdev->data->dev_started ||
1803                 !internals->link_status_polling_enabled)
1804                 return;
1805
1806         /* If device is currently being configured then don't check slaves link
1807          * status, wait until next period */
1808         if (rte_spinlock_trylock(&internals->lock)) {
1809                 if (internals->slave_count > 0)
1810                         polling_slave_found = 0;
1811
1812                 for (i = 0; i < internals->slave_count; i++) {
1813                         if (!internals->slaves[i].link_status_poll_enabled)
1814                                 continue;
1815
1816                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1817                         polling_slave_found = 1;
1818
1819                         /* Update slave link status */
1820                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1821                                         internals->slaves[i].link_status_wait_to_complete);
1822
1823                         /* if link status has changed since last checked then call lsc
1824                          * event callback */
1825                         if (slave_ethdev->data->dev_link.link_status !=
1826                                         internals->slaves[i].last_link_status) {
1827                                 internals->slaves[i].last_link_status =
1828                                                 slave_ethdev->data->dev_link.link_status;
1829
1830                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1831                                                 RTE_ETH_EVENT_INTR_LSC,
1832                                                 &bonded_ethdev->data->port_id);
1833                         }
1834                 }
1835                 rte_spinlock_unlock(&internals->lock);
1836         }
1837
1838         if (polling_slave_found)
1839                 /* Set alarm to continue monitoring link status of slave ethdev's */
1840                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1841                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1842 }
1843
1844 static int
1845 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1846                 int wait_to_complete)
1847 {
1848         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1849
1850         if (!bonded_eth_dev->data->dev_started ||
1851                 internals->active_slave_count == 0) {
1852                 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1853                 return 0;
1854         } else {
1855                 struct rte_eth_dev *slave_eth_dev;
1856                 int i, link_up = 0;
1857
1858                 for (i = 0; i < internals->active_slave_count; i++) {
1859                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1860
1861                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1862                                         wait_to_complete);
1863                         if (slave_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
1864                                 link_up = 1;
1865                                 break;
1866                         }
1867                 }
1868
1869                 bonded_eth_dev->data->dev_link.link_status = link_up;
1870         }
1871
1872         return 0;
1873 }
1874
1875 static void
1876 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1877 {
1878         struct bond_dev_private *internals = dev->data->dev_private;
1879         struct rte_eth_stats slave_stats;
1880         int i, j;
1881
1882         for (i = 0; i < internals->slave_count; i++) {
1883                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1884
1885                 stats->ipackets += slave_stats.ipackets;
1886                 stats->opackets += slave_stats.opackets;
1887                 stats->ibytes += slave_stats.ibytes;
1888                 stats->obytes += slave_stats.obytes;
1889                 stats->imissed += slave_stats.imissed;
1890                 stats->ierrors += slave_stats.ierrors;
1891                 stats->oerrors += slave_stats.oerrors;
1892                 stats->rx_nombuf += slave_stats.rx_nombuf;
1893
1894                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1895                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1896                         stats->q_opackets[j] += slave_stats.q_opackets[j];
1897                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1898                         stats->q_obytes[j] += slave_stats.q_obytes[j];
1899                         stats->q_errors[j] += slave_stats.q_errors[j];
1900                 }
1901
1902         }
1903 }
1904
1905 static void
1906 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1907 {
1908         struct bond_dev_private *internals = dev->data->dev_private;
1909         int i;
1910
1911         for (i = 0; i < internals->slave_count; i++)
1912                 rte_eth_stats_reset(internals->slaves[i].port_id);
1913 }
1914
1915 static void
1916 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1917 {
1918         struct bond_dev_private *internals = eth_dev->data->dev_private;
1919         int i;
1920
1921         internals->promiscuous_en = 1;
1922
1923         switch (internals->mode) {
1924         /* Promiscuous mode is propagated to all slaves */
1925         case BONDING_MODE_ROUND_ROBIN:
1926         case BONDING_MODE_BALANCE:
1927         case BONDING_MODE_BROADCAST:
1928                 for (i = 0; i < internals->slave_count; i++)
1929                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1930                 break;
1931         /* In mode4 promiscus mode is managed when slave is added/removed */
1932         case BONDING_MODE_8023AD:
1933                 break;
1934         /* Promiscuous mode is propagated only to primary slave */
1935         case BONDING_MODE_ACTIVE_BACKUP:
1936         case BONDING_MODE_TLB:
1937         case BONDING_MODE_ALB:
1938         default:
1939                 rte_eth_promiscuous_enable(internals->current_primary_port);
1940         }
1941 }
1942
1943 static void
1944 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1945 {
1946         struct bond_dev_private *internals = dev->data->dev_private;
1947         int i;
1948
1949         internals->promiscuous_en = 0;
1950
1951         switch (internals->mode) {
1952         /* Promiscuous mode is propagated to all slaves */
1953         case BONDING_MODE_ROUND_ROBIN:
1954         case BONDING_MODE_BALANCE:
1955         case BONDING_MODE_BROADCAST:
1956                 for (i = 0; i < internals->slave_count; i++)
1957                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1958                 break;
1959         /* In mode4 promiscus mode is set managed when slave is added/removed */
1960         case BONDING_MODE_8023AD:
1961                 break;
1962         /* Promiscuous mode is propagated only to primary slave */
1963         case BONDING_MODE_ACTIVE_BACKUP:
1964         case BONDING_MODE_TLB:
1965         case BONDING_MODE_ALB:
1966         default:
1967                 rte_eth_promiscuous_disable(internals->current_primary_port);
1968         }
1969 }
1970
1971 static void
1972 bond_ethdev_delayed_lsc_propagation(void *arg)
1973 {
1974         if (arg == NULL)
1975                 return;
1976
1977         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1978                         RTE_ETH_EVENT_INTR_LSC, NULL);
1979 }
1980
1981 void
1982 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1983                 void *param)
1984 {
1985         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1986         struct bond_dev_private *internals;
1987         struct rte_eth_link link;
1988
1989         int i, valid_slave = 0;
1990         uint8_t active_pos;
1991         uint8_t lsc_flag = 0;
1992
1993         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1994                 return;
1995
1996         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1997         slave_eth_dev = &rte_eth_devices[port_id];
1998
1999         if (check_for_bonded_ethdev(bonded_eth_dev))
2000                 return;
2001
2002         internals = bonded_eth_dev->data->dev_private;
2003
2004         /* If the device isn't started don't handle interrupts */
2005         if (!bonded_eth_dev->data->dev_started)
2006                 return;
2007
2008         /* verify that port_id is a valid slave of bonded port */
2009         for (i = 0; i < internals->slave_count; i++) {
2010                 if (internals->slaves[i].port_id == port_id) {
2011                         valid_slave = 1;
2012                         break;
2013                 }
2014         }
2015
2016         if (!valid_slave)
2017                 return;
2018
2019         /* Search for port in active port list */
2020         active_pos = find_slave_by_id(internals->active_slaves,
2021                         internals->active_slave_count, port_id);
2022
2023         rte_eth_link_get_nowait(port_id, &link);
2024         if (link.link_status) {
2025                 if (active_pos < internals->active_slave_count)
2026                         return;
2027
2028                 /* if no active slave ports then set this port to be primary port */
2029                 if (internals->active_slave_count < 1) {
2030                         /* If first active slave, then change link status */
2031                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2032                         internals->current_primary_port = port_id;
2033                         lsc_flag = 1;
2034
2035                         mac_address_slaves_update(bonded_eth_dev);
2036
2037                         /* Inherit eth dev link properties from first active slave */
2038                         link_properties_set(bonded_eth_dev,
2039                                         &(slave_eth_dev->data->dev_link));
2040                 } else {
2041                         if (link_properties_valid(
2042                                 &bonded_eth_dev->data->dev_link, &link) != 0) {
2043                                 slave_eth_dev->data->dev_flags &=
2044                                         (~RTE_ETH_DEV_BONDED_SLAVE);
2045                                 RTE_LOG(ERR, PMD,
2046                                         "port %u invalid speed/duplex\n",
2047                                         port_id);
2048                                 return;
2049                         }
2050                 }
2051
2052                 activate_slave(bonded_eth_dev, port_id);
2053
2054                 /* If user has defined the primary port then default to using it */
2055                 if (internals->user_defined_primary_port &&
2056                                 internals->primary_port == port_id)
2057                         bond_ethdev_primary_set(internals, port_id);
2058         } else {
2059                 if (active_pos == internals->active_slave_count)
2060                         return;
2061
2062                 /* Remove from active slave list */
2063                 deactivate_slave(bonded_eth_dev, port_id);
2064
2065                 /* No active slaves, change link status to down and reset other
2066                  * link properties */
2067                 if (internals->active_slave_count < 1) {
2068                         lsc_flag = 1;
2069                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2070
2071                         link_properties_reset(bonded_eth_dev);
2072                 }
2073
2074                 /* Update primary id, take first active slave from list or if none
2075                  * available set to -1 */
2076                 if (port_id == internals->current_primary_port) {
2077                         if (internals->active_slave_count > 0)
2078                                 bond_ethdev_primary_set(internals,
2079                                                 internals->active_slaves[0]);
2080                         else
2081                                 internals->current_primary_port = internals->primary_port;
2082                 }
2083         }
2084
2085         if (lsc_flag) {
2086                 /* Cancel any possible outstanding interrupts if delays are enabled */
2087                 if (internals->link_up_delay_ms > 0 ||
2088                         internals->link_down_delay_ms > 0)
2089                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2090                                         bonded_eth_dev);
2091
2092                 if (bonded_eth_dev->data->dev_link.link_status) {
2093                         if (internals->link_up_delay_ms > 0)
2094                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2095                                                 bond_ethdev_delayed_lsc_propagation,
2096                                                 (void *)bonded_eth_dev);
2097                         else
2098                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2099                                                 RTE_ETH_EVENT_INTR_LSC, NULL);
2100
2101                 } else {
2102                         if (internals->link_down_delay_ms > 0)
2103                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2104                                                 bond_ethdev_delayed_lsc_propagation,
2105                                                 (void *)bonded_eth_dev);
2106                         else
2107                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2108                                                 RTE_ETH_EVENT_INTR_LSC, NULL);
2109                 }
2110         }
2111 }
2112
2113 static int
2114 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2115                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2116 {
2117         unsigned i, j;
2118         int result = 0;
2119         int slave_reta_size;
2120         unsigned reta_count;
2121         struct bond_dev_private *internals = dev->data->dev_private;
2122
2123         if (reta_size != internals->reta_size)
2124                 return -EINVAL;
2125
2126          /* Copy RETA table */
2127         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2128
2129         for (i = 0; i < reta_count; i++) {
2130                 internals->reta_conf[i].mask = reta_conf[i].mask;
2131                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2132                         if ((reta_conf[i].mask >> j) & 0x01)
2133                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2134         }
2135
2136         /* Fill rest of array */
2137         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2138                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2139                                 sizeof(internals->reta_conf[0]) * reta_count);
2140
2141         /* Propagate RETA over slaves */
2142         for (i = 0; i < internals->slave_count; i++) {
2143                 slave_reta_size = internals->slaves[i].reta_size;
2144                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2145                                 &internals->reta_conf[0], slave_reta_size);
2146                 if (result < 0)
2147                         return result;
2148         }
2149
2150         return 0;
2151 }
2152
2153 static int
2154 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2155                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2156 {
2157         int i, j;
2158         struct bond_dev_private *internals = dev->data->dev_private;
2159
2160         if (reta_size != internals->reta_size)
2161                 return -EINVAL;
2162
2163          /* Copy RETA table */
2164         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2165                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2166                         if ((reta_conf[i].mask >> j) & 0x01)
2167                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2168
2169         return 0;
2170 }
2171
2172 static int
2173 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2174                 struct rte_eth_rss_conf *rss_conf)
2175 {
2176         int i, result = 0;
2177         struct bond_dev_private *internals = dev->data->dev_private;
2178         struct rte_eth_rss_conf bond_rss_conf;
2179
2180         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2181
2182         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2183
2184         if (bond_rss_conf.rss_hf != 0)
2185                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2186
2187         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2188                         sizeof(internals->rss_key)) {
2189                 if (bond_rss_conf.rss_key_len == 0)
2190                         bond_rss_conf.rss_key_len = 40;
2191                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2192                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2193                                 internals->rss_key_len);
2194         }
2195
2196         for (i = 0; i < internals->slave_count; i++) {
2197                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2198                                 &bond_rss_conf);
2199                 if (result < 0)
2200                         return result;
2201         }
2202
2203         return 0;
2204 }
2205
2206 static int
2207 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2208                 struct rte_eth_rss_conf *rss_conf)
2209 {
2210         struct bond_dev_private *internals = dev->data->dev_private;
2211
2212         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2213         rss_conf->rss_key_len = internals->rss_key_len;
2214         if (rss_conf->rss_key)
2215                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2216
2217         return 0;
2218 }
2219
2220 const struct eth_dev_ops default_dev_ops = {
2221         .dev_start            = bond_ethdev_start,
2222         .dev_stop             = bond_ethdev_stop,
2223         .dev_close            = bond_ethdev_close,
2224         .dev_configure        = bond_ethdev_configure,
2225         .dev_infos_get        = bond_ethdev_info,
2226         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2227         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2228         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2229         .rx_queue_release     = bond_ethdev_rx_queue_release,
2230         .tx_queue_release     = bond_ethdev_tx_queue_release,
2231         .link_update          = bond_ethdev_link_update,
2232         .stats_get            = bond_ethdev_stats_get,
2233         .stats_reset          = bond_ethdev_stats_reset,
2234         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2235         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2236         .reta_update          = bond_ethdev_rss_reta_update,
2237         .reta_query           = bond_ethdev_rss_reta_query,
2238         .rss_hash_update      = bond_ethdev_rss_hash_update,
2239         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2240 };
2241
2242 static int
2243 bond_probe(struct rte_vdev_device *dev)
2244 {
2245         const char *name;
2246         struct bond_dev_private *internals;
2247         struct rte_kvargs *kvlist;
2248         uint8_t bonding_mode, socket_id;
2249         int  arg_count, port_id;
2250
2251         name = rte_vdev_device_name(dev);
2252         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2253
2254         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2255                 pmd_bond_init_valid_arguments);
2256         if (kvlist == NULL)
2257                 return -1;
2258
2259         /* Parse link bonding mode */
2260         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2261                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2262                                 &bond_ethdev_parse_slave_mode_kvarg,
2263                                 &bonding_mode) != 0) {
2264                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2265                                         name);
2266                         goto parse_error;
2267                 }
2268         } else {
2269                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2270                                 "device %s\n", name);
2271                 goto parse_error;
2272         }
2273
2274         /* Parse socket id to create bonding device on */
2275         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2276         if (arg_count == 1) {
2277                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2278                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2279                                 != 0) {
2280                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2281                                         "bonded device %s\n", name);
2282                         goto parse_error;
2283                 }
2284         } else if (arg_count > 1) {
2285                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2286                                 "bonded device %s\n", name);
2287                 goto parse_error;
2288         } else {
2289                 socket_id = rte_socket_id();
2290         }
2291
2292         /* Create link bonding eth device */
2293         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2294         if (port_id < 0) {
2295                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2296                                 "socket %u.\n", name, bonding_mode, socket_id);
2297                 goto parse_error;
2298         }
2299         internals = rte_eth_devices[port_id].data->dev_private;
2300         internals->kvlist = kvlist;
2301
2302         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2303                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2304         return 0;
2305
2306 parse_error:
2307         rte_kvargs_free(kvlist);
2308
2309         return -1;
2310 }
2311
2312 static int
2313 bond_remove(struct rte_vdev_device *dev)
2314 {
2315         const char *name;
2316         int  ret;
2317
2318         if (!dev)
2319                 return -EINVAL;
2320
2321         name = rte_vdev_device_name(dev);
2322         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2323
2324         /* free link bonding eth device */
2325         ret = rte_eth_bond_free(name);
2326         if (ret < 0)
2327                 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2328
2329         return ret;
2330 }
2331
2332 /* this part will resolve the slave portids after all the other pdev and vdev
2333  * have been allocated */
2334 static int
2335 bond_ethdev_configure(struct rte_eth_dev *dev)
2336 {
2337         char *name = dev->data->name;
2338         struct bond_dev_private *internals = dev->data->dev_private;
2339         struct rte_kvargs *kvlist = internals->kvlist;
2340         int arg_count;
2341         uint8_t port_id = dev - rte_eth_devices;
2342
2343         static const uint8_t default_rss_key[40] = {
2344                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2345                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2346                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2347                 0xBE, 0xAC, 0x01, 0xFA
2348         };
2349
2350         unsigned i, j;
2351
2352         /* If RSS is enabled, fill table and key with default values */
2353         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2354                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2355                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2356                 memcpy(internals->rss_key, default_rss_key, 40);
2357
2358                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2359                         internals->reta_conf[i].mask = ~0LL;
2360                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2361                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2362                 }
2363         }
2364
2365         /* set the max_rx_pktlen */
2366         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2367
2368         /*
2369          * if no kvlist, it means that this bonded device has been created
2370          * through the bonding api.
2371          */
2372         if (!kvlist)
2373                 return 0;
2374
2375         /* Parse MAC address for bonded device */
2376         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2377         if (arg_count == 1) {
2378                 struct ether_addr bond_mac;
2379
2380                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2381                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2382                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2383                                         name);
2384                         return -1;
2385                 }
2386
2387                 /* Set MAC address */
2388                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2389                         RTE_LOG(ERR, EAL,
2390                                         "Failed to set mac address on bonded device %s\n",
2391                                         name);
2392                         return -1;
2393                 }
2394         } else if (arg_count > 1) {
2395                 RTE_LOG(ERR, EAL,
2396                                 "MAC address can be specified only once for bonded device %s\n",
2397                                 name);
2398                 return -1;
2399         }
2400
2401         /* Parse/set balance mode transmit policy */
2402         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2403         if (arg_count == 1) {
2404                 uint8_t xmit_policy;
2405
2406                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2407                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2408                                                 0) {
2409                         RTE_LOG(INFO, EAL,
2410                                         "Invalid xmit policy specified for bonded device %s\n",
2411                                         name);
2412                         return -1;
2413                 }
2414
2415                 /* Set balance mode transmit policy*/
2416                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2417                         RTE_LOG(ERR, EAL,
2418                                         "Failed to set balance xmit policy on bonded device %s\n",
2419                                         name);
2420                         return -1;
2421                 }
2422         } else if (arg_count > 1) {
2423                 RTE_LOG(ERR, EAL,
2424                                 "Transmit policy can be specified only once for bonded device"
2425                                 " %s\n", name);
2426                 return -1;
2427         }
2428
2429         /* Parse/add slave ports to bonded device */
2430         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2431                 struct bond_ethdev_slave_ports slave_ports;
2432                 unsigned i;
2433
2434                 memset(&slave_ports, 0, sizeof(slave_ports));
2435
2436                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2437                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2438                         RTE_LOG(ERR, EAL,
2439                                         "Failed to parse slave ports for bonded device %s\n",
2440                                         name);
2441                         return -1;
2442                 }
2443
2444                 for (i = 0; i < slave_ports.slave_count; i++) {
2445                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2446                                 RTE_LOG(ERR, EAL,
2447                                                 "Failed to add port %d as slave to bonded device %s\n",
2448                                                 slave_ports.slaves[i], name);
2449                         }
2450                 }
2451
2452         } else {
2453                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2454                 return -1;
2455         }
2456
2457         /* Parse/set primary slave port id*/
2458         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2459         if (arg_count == 1) {
2460                 uint8_t primary_slave_port_id;
2461
2462                 if (rte_kvargs_process(kvlist,
2463                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
2464                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2465                                 &primary_slave_port_id) < 0) {
2466                         RTE_LOG(INFO, EAL,
2467                                         "Invalid primary slave port id specified for bonded device"
2468                                         " %s\n", name);
2469                         return -1;
2470                 }
2471
2472                 /* Set balance mode transmit policy*/
2473                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2474                                 != 0) {
2475                         RTE_LOG(ERR, EAL,
2476                                         "Failed to set primary slave port %d on bonded device %s\n",
2477                                         primary_slave_port_id, name);
2478                         return -1;
2479                 }
2480         } else if (arg_count > 1) {
2481                 RTE_LOG(INFO, EAL,
2482                                 "Primary slave can be specified only once for bonded device"
2483                                 " %s\n", name);
2484                 return -1;
2485         }
2486
2487         /* Parse link status monitor polling interval */
2488         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2489         if (arg_count == 1) {
2490                 uint32_t lsc_poll_interval_ms;
2491
2492                 if (rte_kvargs_process(kvlist,
2493                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2494                                 &bond_ethdev_parse_time_ms_kvarg,
2495                                 &lsc_poll_interval_ms) < 0) {
2496                         RTE_LOG(INFO, EAL,
2497                                         "Invalid lsc polling interval value specified for bonded"
2498                                         " device %s\n", name);
2499                         return -1;
2500                 }
2501
2502                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2503                                 != 0) {
2504                         RTE_LOG(ERR, EAL,
2505                                         "Failed to set lsc monitor polling interval (%u ms) on"
2506                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2507                         return -1;
2508                 }
2509         } else if (arg_count > 1) {
2510                 RTE_LOG(INFO, EAL,
2511                                 "LSC polling interval can be specified only once for bonded"
2512                                 " device %s\n", name);
2513                 return -1;
2514         }
2515
2516         /* Parse link up interrupt propagation delay */
2517         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2518         if (arg_count == 1) {
2519                 uint32_t link_up_delay_ms;
2520
2521                 if (rte_kvargs_process(kvlist,
2522                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2523                                 &bond_ethdev_parse_time_ms_kvarg,
2524                                 &link_up_delay_ms) < 0) {
2525                         RTE_LOG(INFO, EAL,
2526                                         "Invalid link up propagation delay value specified for"
2527                                         " bonded device %s\n", name);
2528                         return -1;
2529                 }
2530
2531                 /* Set balance mode transmit policy*/
2532                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2533                                 != 0) {
2534                         RTE_LOG(ERR, EAL,
2535                                         "Failed to set link up propagation delay (%u ms) on bonded"
2536                                         " device %s\n", link_up_delay_ms, name);
2537                         return -1;
2538                 }
2539         } else if (arg_count > 1) {
2540                 RTE_LOG(INFO, EAL,
2541                                 "Link up propagation delay can be specified only once for"
2542                                 " bonded device %s\n", name);
2543                 return -1;
2544         }
2545
2546         /* Parse link down interrupt propagation delay */
2547         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2548         if (arg_count == 1) {
2549                 uint32_t link_down_delay_ms;
2550
2551                 if (rte_kvargs_process(kvlist,
2552                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2553                                 &bond_ethdev_parse_time_ms_kvarg,
2554                                 &link_down_delay_ms) < 0) {
2555                         RTE_LOG(INFO, EAL,
2556                                         "Invalid link down propagation delay value specified for"
2557                                         " bonded device %s\n", name);
2558                         return -1;
2559                 }
2560
2561                 /* Set balance mode transmit policy*/
2562                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2563                                 != 0) {
2564                         RTE_LOG(ERR, EAL,
2565                                         "Failed to set link down propagation delay (%u ms) on"
2566                                         " bonded device %s\n", link_down_delay_ms, name);
2567                         return -1;
2568                 }
2569         } else if (arg_count > 1) {
2570                 RTE_LOG(INFO, EAL,
2571                                 "Link down propagation delay can be specified only once for"
2572                                 " bonded device %s\n", name);
2573                 return -1;
2574         }
2575
2576         return 0;
2577 }
2578
2579 struct rte_vdev_driver pmd_bond_drv = {
2580         .probe = bond_probe,
2581         .remove = bond_remove,
2582 };
2583
2584 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
2585 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
2586
2587 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
2588         "slave=<ifc> "
2589         "primary=<ifc> "
2590         "mode=[0-6] "
2591         "xmit_policy=[l2 | l23 | l34] "
2592         "socket_id=<int> "
2593         "mac=<mac addr> "
2594         "lsc_poll_period_ms=<int> "
2595         "up_delay=<int> "
2596         "down_delay=<int>");