net/bonding: add dedicated HW queues for LACP control
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_ip.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
49
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
53
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
56
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
58
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
61
62 static inline size_t
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
64 {
65         size_t vlan_offset = 0;
66
67         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
69
70                 vlan_offset = sizeof(struct vlan_hdr);
71                 *proto = vlan_hdr->eth_proto;
72
73                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74                         vlan_hdr = vlan_hdr + 1;
75                         *proto = vlan_hdr->eth_proto;
76                         vlan_offset += sizeof(struct vlan_hdr);
77                 }
78         }
79         return vlan_offset;
80 }
81
82 static uint16_t
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
84 {
85         struct bond_dev_private *internals;
86
87         uint16_t num_rx_slave = 0;
88         uint16_t num_rx_total = 0;
89
90         int i;
91
92         /* Cast to structure, containing bonded device's port id and queue id */
93         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
94
95         internals = bd_rx_q->dev_private;
96
97
98         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99                 /* Offset of pointer to *bufs increases as packets are received
100                  * from other slaves */
101                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
103                 if (num_rx_slave) {
104                         num_rx_total += num_rx_slave;
105                         nb_pkts -= num_rx_slave;
106                 }
107         }
108
109         return num_rx_total;
110 }
111
112 static uint16_t
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
114                 uint16_t nb_pkts)
115 {
116         struct bond_dev_private *internals;
117
118         /* Cast to structure, containing bonded device's port id and queue id */
119         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
120
121         internals = bd_rx_q->dev_private;
122
123         return rte_eth_rx_burst(internals->current_primary_port,
124                         bd_rx_q->queue_id, bufs, nb_pkts);
125 }
126
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
129 {
130         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
131
132         return !vlan_tci && (ethertype == ether_type_slow_be &&
133                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
134 }
135
136 /*****************************************************************************
137  * Flow director's setup for mode 4 optimization
138  */
139
140 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
141         .dst.addr_bytes = { 0 },
142         .src.addr_bytes = { 0 },
143         .type = RTE_BE16(ETHER_TYPE_SLOW),
144 };
145
146 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
147         .dst.addr_bytes = { 0 },
148         .src.addr_bytes = { 0 },
149         .type = 0xFFFF,
150 };
151
152 static struct rte_flow_item flow_item_8023ad[] = {
153         {
154                 .type = RTE_FLOW_ITEM_TYPE_ETH,
155                 .spec = &flow_item_eth_type_8023ad,
156                 .last = NULL,
157                 .mask = &flow_item_eth_mask_type_8023ad,
158         },
159         {
160                 .type = RTE_FLOW_ITEM_TYPE_END,
161                 .spec = NULL,
162                 .last = NULL,
163                 .mask = NULL,
164         }
165 };
166
167 const struct rte_flow_attr flow_attr_8023ad = {
168         .group = 0,
169         .priority = 0,
170         .ingress = 1,
171         .egress = 0,
172         .reserved = 0,
173 };
174
175 int
176 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
177                 uint8_t slave_port) {
178         struct rte_flow_error error;
179         struct bond_dev_private *internals = (struct bond_dev_private *)
180                         (bond_dev->data->dev_private);
181
182         struct rte_flow_action_queue lacp_queue_conf = {
183                 .index = internals->mode4.dedicated_queues.rx_qid,
184         };
185
186         const struct rte_flow_action actions[] = {
187                 {
188                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
189                         .conf = &lacp_queue_conf
190                 },
191                 {
192                         .type = RTE_FLOW_ACTION_TYPE_END,
193                 }
194         };
195
196         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
197                         flow_item_8023ad, actions, &error);
198         if (ret < 0)
199                 return -1;
200
201         return 0;
202 }
203
204 int
205 bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
206         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
207         struct bond_dev_private *internals = (struct bond_dev_private *)
208                         (bond_dev->data->dev_private);
209         struct rte_eth_dev_info bond_info, slave_info;
210         uint8_t idx;
211
212         /* Verify if all slaves in bonding supports flow director and */
213         if (internals->slave_count > 0) {
214                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
215
216                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
217                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
218
219                 for (idx = 0; idx < internals->slave_count; idx++) {
220                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
221                                         &slave_info);
222
223                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
224                                         internals->slaves[idx].port_id) != 0)
225                                 return -1;
226                 }
227         }
228
229         return 0;
230 }
231
232 int
233 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port) {
234
235         struct rte_flow_error error;
236         struct bond_dev_private *internals = (struct bond_dev_private *)
237                         (bond_dev->data->dev_private);
238
239         struct rte_flow_action_queue lacp_queue_conf = {
240                 .index = internals->mode4.dedicated_queues.rx_qid,
241         };
242
243         const struct rte_flow_action actions[] = {
244                 {
245                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
246                         .conf = &lacp_queue_conf
247                 },
248                 {
249                         .type = RTE_FLOW_ACTION_TYPE_END,
250                 }
251         };
252
253         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
254                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
255         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
256                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
257                                 "(slave_port=%d queue_id=%d)",
258                                 error.message, slave_port,
259                                 internals->mode4.dedicated_queues.rx_qid);
260                 return -1;
261         }
262
263         return 0;
264 }
265
266 static uint16_t
267 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
268                 uint16_t nb_pkts)
269 {
270         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
271         struct bond_dev_private *internals = bd_rx_q->dev_private;
272         uint16_t num_rx_total = 0;      /* Total number of received packets */
273         uint8_t slaves[RTE_MAX_ETHPORTS];
274         uint8_t slave_count;
275
276         uint8_t i, idx;
277
278         /* Copy slave list to protect against slave up/down changes during tx
279          * bursting */
280         slave_count = internals->active_slave_count;
281         memcpy(slaves, internals->active_slaves,
282                         sizeof(internals->active_slaves[0]) * slave_count);
283
284         for (i = 0, idx = internals->active_slave;
285                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
286                 idx = idx % slave_count;
287
288                 /* Read packets from this slave */
289                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
290                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
291         }
292
293         internals->active_slave = idx;
294
295         return num_rx_total;
296 }
297
298 static uint16_t
299 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
300                 uint16_t nb_pkts)
301 {
302         struct bond_dev_private *internals;
303         struct bond_tx_queue *bd_tx_q;
304
305         uint8_t num_of_slaves;
306         uint8_t slaves[RTE_MAX_ETHPORTS];
307          /* positions in slaves, not ID */
308         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
309         uint8_t distributing_count;
310
311         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
312         uint16_t i, op_slave_idx;
313
314         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
315
316         /* Total amount of packets in slave_bufs */
317         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
318         /* Slow packets placed in each slave */
319
320         if (unlikely(nb_pkts == 0))
321                 return 0;
322
323         bd_tx_q = (struct bond_tx_queue *)queue;
324         internals = bd_tx_q->dev_private;
325
326         /* Copy slave list to protect against slave up/down changes during tx
327          * bursting */
328         num_of_slaves = internals->active_slave_count;
329         if (num_of_slaves < 1)
330                 return num_tx_total;
331
332         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
333                         num_of_slaves);
334
335         distributing_count = 0;
336         for (i = 0; i < num_of_slaves; i++) {
337                 struct port *port = &mode_8023ad_ports[slaves[i]];
338                 if (ACTOR_STATE(port, DISTRIBUTING))
339                         distributing_offsets[distributing_count++] = i;
340         }
341
342         if (likely(distributing_count > 0)) {
343                 /* Populate slaves mbuf with the packets which are to be sent */
344                 for (i = 0; i < nb_pkts; i++) {
345                         /* Select output slave using hash based on xmit policy */
346                         op_slave_idx = internals->xmit_hash(bufs[i],
347                                         distributing_count);
348
349                         /* Populate slave mbuf arrays with mbufs for that slave.
350                          * Use only slaves that are currently distributing.
351                          */
352                         uint8_t slave_offset =
353                                         distributing_offsets[op_slave_idx];
354                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
355                                         bufs[i];
356                         slave_nb_pkts[slave_offset]++;
357                 }
358         }
359
360         /* Send packet burst on each slave device */
361         for (i = 0; i < num_of_slaves; i++) {
362                 if (slave_nb_pkts[i] == 0)
363                         continue;
364
365                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
366                                 slave_bufs[i], slave_nb_pkts[i]);
367
368                 num_tx_total += num_tx_slave;
369                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
370
371                 /* If tx burst fails move packets to end of bufs */
372                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
373                         uint16_t j = nb_pkts - num_tx_fail_total;
374                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
375                                         num_tx_slave++)
376                                 bufs[j] = slave_bufs[i][num_tx_slave];
377                 }
378         }
379
380         return num_tx_total;
381 }
382
383
384 static uint16_t
385 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
386                 uint16_t nb_pkts)
387 {
388         /* Cast to structure, containing bonded device's port id and queue id */
389         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
390         struct bond_dev_private *internals = bd_rx_q->dev_private;
391         struct ether_addr bond_mac;
392
393         struct ether_hdr *hdr;
394
395         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
396         uint16_t num_rx_total = 0;      /* Total number of received packets */
397         uint8_t slaves[RTE_MAX_ETHPORTS];
398         uint8_t slave_count, idx;
399
400         uint8_t collecting;  /* current slave collecting status */
401         const uint8_t promisc = internals->promiscuous_en;
402         uint8_t i, j, k;
403         uint8_t subtype;
404
405         rte_eth_macaddr_get(internals->port_id, &bond_mac);
406         /* Copy slave list to protect against slave up/down changes during tx
407          * bursting */
408         slave_count = internals->active_slave_count;
409         memcpy(slaves, internals->active_slaves,
410                         sizeof(internals->active_slaves[0]) * slave_count);
411
412         idx = internals->active_slave;
413         if (idx >= slave_count) {
414                 internals->active_slave = 0;
415                 idx = 0;
416         }
417         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
418                 j = num_rx_total;
419                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
420                                          COLLECTING);
421
422                 /* Read packets from this slave */
423                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
424                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
425
426                 for (k = j; k < 2 && k < num_rx_total; k++)
427                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
428
429                 /* Handle slow protocol packets. */
430                 while (j < num_rx_total) {
431
432                         /* If packet is not pure L2 and is known, skip it */
433                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
434                                 j++;
435                                 continue;
436                         }
437
438                         if (j + 3 < num_rx_total)
439                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
440
441                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
442                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
443
444                         /* Remove packet from array if it is slow packet or slave is not
445                          * in collecting state or bonding interface is not in promiscuous
446                          * mode and packet address does not match. */
447                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
448                                 !collecting || (!promisc &&
449                                         !is_multicast_ether_addr(&hdr->d_addr) &&
450                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
451
452                                 if (hdr->ether_type == ether_type_slow_be) {
453                                         bond_mode_8023ad_handle_slow_pkt(
454                                             internals, slaves[idx], bufs[j]);
455                                 } else
456                                         rte_pktmbuf_free(bufs[j]);
457
458                                 /* Packet is managed by mode 4 or dropped, shift the array */
459                                 num_rx_total--;
460                                 if (j < num_rx_total) {
461                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
462                                                 (num_rx_total - j));
463                                 }
464                         } else
465                                 j++;
466                 }
467                 if (unlikely(++idx == slave_count))
468                         idx = 0;
469         }
470
471         internals->active_slave = idx;
472         return num_rx_total;
473 }
474
475 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
476 uint32_t burstnumberRX;
477 uint32_t burstnumberTX;
478
479 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
480
481 static void
482 arp_op_name(uint16_t arp_op, char *buf)
483 {
484         switch (arp_op) {
485         case ARP_OP_REQUEST:
486                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
487                 return;
488         case ARP_OP_REPLY:
489                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
490                 return;
491         case ARP_OP_REVREQUEST:
492                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
493                                 "Reverse ARP Request");
494                 return;
495         case ARP_OP_REVREPLY:
496                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
497                                 "Reverse ARP Reply");
498                 return;
499         case ARP_OP_INVREQUEST:
500                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
501                                 "Peer Identify Request");
502                 return;
503         case ARP_OP_INVREPLY:
504                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
505                                 "Peer Identify Reply");
506                 return;
507         default:
508                 break;
509         }
510         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
511         return;
512 }
513 #endif
514 #define MaxIPv4String   16
515 static void
516 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
517 {
518         uint32_t ipv4_addr;
519
520         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
521         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
522                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
523                 ipv4_addr & 0xFF);
524 }
525
526 #define MAX_CLIENTS_NUMBER      128
527 uint8_t active_clients;
528 struct client_stats_t {
529         uint8_t port;
530         uint32_t ipv4_addr;
531         uint32_t ipv4_rx_packets;
532         uint32_t ipv4_tx_packets;
533 };
534 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
535
536 static void
537 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
538 {
539         int i = 0;
540
541         for (; i < MAX_CLIENTS_NUMBER; i++)     {
542                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
543                         /* Just update RX packets number for this client */
544                         if (TXorRXindicator == &burstnumberRX)
545                                 client_stats[i].ipv4_rx_packets++;
546                         else
547                                 client_stats[i].ipv4_tx_packets++;
548                         return;
549                 }
550         }
551         /* We have a new client. Insert him to the table, and increment stats */
552         if (TXorRXindicator == &burstnumberRX)
553                 client_stats[active_clients].ipv4_rx_packets++;
554         else
555                 client_stats[active_clients].ipv4_tx_packets++;
556         client_stats[active_clients].ipv4_addr = addr;
557         client_stats[active_clients].port = port;
558         active_clients++;
559
560 }
561
562 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
563 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
564                 RTE_LOG(DEBUG, PMD, \
565                 "%s " \
566                 "port:%d " \
567                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
568                 "SrcIP:%s " \
569                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
570                 "DstIP:%s " \
571                 "%s " \
572                 "%d\n", \
573                 info, \
574                 port, \
575                 eth_h->s_addr.addr_bytes[0], \
576                 eth_h->s_addr.addr_bytes[1], \
577                 eth_h->s_addr.addr_bytes[2], \
578                 eth_h->s_addr.addr_bytes[3], \
579                 eth_h->s_addr.addr_bytes[4], \
580                 eth_h->s_addr.addr_bytes[5], \
581                 src_ip, \
582                 eth_h->d_addr.addr_bytes[0], \
583                 eth_h->d_addr.addr_bytes[1], \
584                 eth_h->d_addr.addr_bytes[2], \
585                 eth_h->d_addr.addr_bytes[3], \
586                 eth_h->d_addr.addr_bytes[4], \
587                 eth_h->d_addr.addr_bytes[5], \
588                 dst_ip, \
589                 arp_op, \
590                 ++burstnumber)
591 #endif
592
593 static void
594 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
595                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
596 {
597         struct ipv4_hdr *ipv4_h;
598 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
599         struct arp_hdr *arp_h;
600         char dst_ip[16];
601         char ArpOp[24];
602         char buf[16];
603 #endif
604         char src_ip[16];
605
606         uint16_t ether_type = eth_h->ether_type;
607         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
608
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610         snprintf(buf, 16, "%s", info);
611 #endif
612
613         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
614                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
615                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
616 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
617                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
618                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
619 #endif
620                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
621         }
622 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
623         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
624                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
625                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
626                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
627                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
628                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
629         }
630 #endif
631 }
632 #endif
633
634 static uint16_t
635 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
636 {
637         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
638         struct bond_dev_private *internals = bd_tx_q->dev_private;
639         struct ether_hdr *eth_h;
640         uint16_t ether_type, offset;
641         uint16_t nb_recv_pkts;
642         int i;
643
644         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
645
646         for (i = 0; i < nb_recv_pkts; i++) {
647                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
648                 ether_type = eth_h->ether_type;
649                 offset = get_vlan_offset(eth_h, &ether_type);
650
651                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
654 #endif
655                         bond_mode_alb_arp_recv(eth_h, offset, internals);
656                 }
657 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
658                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
659                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
660 #endif
661         }
662
663         return nb_recv_pkts;
664 }
665
666 static uint16_t
667 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
668                 uint16_t nb_pkts)
669 {
670         struct bond_dev_private *internals;
671         struct bond_tx_queue *bd_tx_q;
672
673         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
674         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
675
676         uint8_t num_of_slaves;
677         uint8_t slaves[RTE_MAX_ETHPORTS];
678
679         uint16_t num_tx_total = 0, num_tx_slave;
680
681         static int slave_idx = 0;
682         int i, cslave_idx = 0, tx_fail_total = 0;
683
684         bd_tx_q = (struct bond_tx_queue *)queue;
685         internals = bd_tx_q->dev_private;
686
687         /* Copy slave list to protect against slave up/down changes during tx
688          * bursting */
689         num_of_slaves = internals->active_slave_count;
690         memcpy(slaves, internals->active_slaves,
691                         sizeof(internals->active_slaves[0]) * num_of_slaves);
692
693         if (num_of_slaves < 1)
694                 return num_tx_total;
695
696         /* Populate slaves mbuf with which packets are to be sent on it  */
697         for (i = 0; i < nb_pkts; i++) {
698                 cslave_idx = (slave_idx + i) % num_of_slaves;
699                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
700         }
701
702         /* increment current slave index so the next call to tx burst starts on the
703          * next slave */
704         slave_idx = ++cslave_idx;
705
706         /* Send packet burst on each slave device */
707         for (i = 0; i < num_of_slaves; i++) {
708                 if (slave_nb_pkts[i] > 0) {
709                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
710                                         slave_bufs[i], slave_nb_pkts[i]);
711
712                         /* if tx burst fails move packets to end of bufs */
713                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
714                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
715
716                                 tx_fail_total += tx_fail_slave;
717
718                                 memcpy(&bufs[nb_pkts - tx_fail_total],
719                                                 &slave_bufs[i][num_tx_slave],
720                                                 tx_fail_slave * sizeof(bufs[0]));
721                         }
722                         num_tx_total += num_tx_slave;
723                 }
724         }
725
726         return num_tx_total;
727 }
728
729 static uint16_t
730 bond_ethdev_tx_burst_active_backup(void *queue,
731                 struct rte_mbuf **bufs, uint16_t nb_pkts)
732 {
733         struct bond_dev_private *internals;
734         struct bond_tx_queue *bd_tx_q;
735
736         bd_tx_q = (struct bond_tx_queue *)queue;
737         internals = bd_tx_q->dev_private;
738
739         if (internals->active_slave_count < 1)
740                 return 0;
741
742         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
743                         bufs, nb_pkts);
744 }
745
746 static inline uint16_t
747 ether_hash(struct ether_hdr *eth_hdr)
748 {
749         unaligned_uint16_t *word_src_addr =
750                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
751         unaligned_uint16_t *word_dst_addr =
752                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
753
754         return (word_src_addr[0] ^ word_dst_addr[0]) ^
755                         (word_src_addr[1] ^ word_dst_addr[1]) ^
756                         (word_src_addr[2] ^ word_dst_addr[2]);
757 }
758
759 static inline uint32_t
760 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
761 {
762         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
763 }
764
765 static inline uint32_t
766 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
767 {
768         unaligned_uint32_t *word_src_addr =
769                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
770         unaligned_uint32_t *word_dst_addr =
771                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
772
773         return (word_src_addr[0] ^ word_dst_addr[0]) ^
774                         (word_src_addr[1] ^ word_dst_addr[1]) ^
775                         (word_src_addr[2] ^ word_dst_addr[2]) ^
776                         (word_src_addr[3] ^ word_dst_addr[3]);
777 }
778
779 uint16_t
780 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
781 {
782         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
783
784         uint32_t hash = ether_hash(eth_hdr);
785
786         return (hash ^= hash >> 8) % slave_count;
787 }
788
789 uint16_t
790 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
791 {
792         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
793         uint16_t proto = eth_hdr->ether_type;
794         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
795         uint32_t hash, l3hash = 0;
796
797         hash = ether_hash(eth_hdr);
798
799         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
800                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
801                                 ((char *)(eth_hdr + 1) + vlan_offset);
802                 l3hash = ipv4_hash(ipv4_hdr);
803
804         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
805                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
806                                 ((char *)(eth_hdr + 1) + vlan_offset);
807                 l3hash = ipv6_hash(ipv6_hdr);
808         }
809
810         hash = hash ^ l3hash;
811         hash ^= hash >> 16;
812         hash ^= hash >> 8;
813
814         return hash % slave_count;
815 }
816
817 uint16_t
818 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
819 {
820         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
821         uint16_t proto = eth_hdr->ether_type;
822         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
823
824         struct udp_hdr *udp_hdr = NULL;
825         struct tcp_hdr *tcp_hdr = NULL;
826         uint32_t hash, l3hash = 0, l4hash = 0;
827
828         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
829                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
830                                 ((char *)(eth_hdr + 1) + vlan_offset);
831                 size_t ip_hdr_offset;
832
833                 l3hash = ipv4_hash(ipv4_hdr);
834
835                 /* there is no L4 header in fragmented packet */
836                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
837                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
838                                         IPV4_IHL_MULTIPLIER;
839
840                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
841                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
842                                                 ip_hdr_offset);
843                                 l4hash = HASH_L4_PORTS(tcp_hdr);
844                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
845                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
846                                                 ip_hdr_offset);
847                                 l4hash = HASH_L4_PORTS(udp_hdr);
848                         }
849                 }
850         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
851                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
852                                 ((char *)(eth_hdr + 1) + vlan_offset);
853                 l3hash = ipv6_hash(ipv6_hdr);
854
855                 if (ipv6_hdr->proto == IPPROTO_TCP) {
856                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
857                         l4hash = HASH_L4_PORTS(tcp_hdr);
858                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
859                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
860                         l4hash = HASH_L4_PORTS(udp_hdr);
861                 }
862         }
863
864         hash = l3hash ^ l4hash;
865         hash ^= hash >> 16;
866         hash ^= hash >> 8;
867
868         return hash % slave_count;
869 }
870
871 struct bwg_slave {
872         uint64_t bwg_left_int;
873         uint64_t bwg_left_remainder;
874         uint8_t slave;
875 };
876
877 void
878 bond_tlb_activate_slave(struct bond_dev_private *internals) {
879         int i;
880
881         for (i = 0; i < internals->active_slave_count; i++) {
882                 tlb_last_obytets[internals->active_slaves[i]] = 0;
883         }
884 }
885
886 static int
887 bandwidth_cmp(const void *a, const void *b)
888 {
889         const struct bwg_slave *bwg_a = a;
890         const struct bwg_slave *bwg_b = b;
891         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
892         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
893                         (int64_t)bwg_a->bwg_left_remainder;
894         if (diff > 0)
895                 return 1;
896         else if (diff < 0)
897                 return -1;
898         else if (diff2 > 0)
899                 return 1;
900         else if (diff2 < 0)
901                 return -1;
902         else
903                 return 0;
904 }
905
906 static void
907 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
908                 struct bwg_slave *bwg_slave)
909 {
910         struct rte_eth_link link_status;
911
912         rte_eth_link_get_nowait(port_id, &link_status);
913         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
914         if (link_bwg == 0)
915                 return;
916         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
917         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
918         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
919 }
920
921 static void
922 bond_ethdev_update_tlb_slave_cb(void *arg)
923 {
924         struct bond_dev_private *internals = arg;
925         struct rte_eth_stats slave_stats;
926         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
927         uint8_t slave_count;
928         uint64_t tx_bytes;
929
930         uint8_t update_stats = 0;
931         uint8_t i, slave_id;
932
933         internals->slave_update_idx++;
934
935
936         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
937                 update_stats = 1;
938
939         for (i = 0; i < internals->active_slave_count; i++) {
940                 slave_id = internals->active_slaves[i];
941                 rte_eth_stats_get(slave_id, &slave_stats);
942                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
943                 bandwidth_left(slave_id, tx_bytes,
944                                 internals->slave_update_idx, &bwg_array[i]);
945                 bwg_array[i].slave = slave_id;
946
947                 if (update_stats) {
948                         tlb_last_obytets[slave_id] = slave_stats.obytes;
949                 }
950         }
951
952         if (update_stats == 1)
953                 internals->slave_update_idx = 0;
954
955         slave_count = i;
956         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
957         for (i = 0; i < slave_count; i++)
958                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
959
960         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
961                         (struct bond_dev_private *)internals);
962 }
963
964 static uint16_t
965 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
966 {
967         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
968         struct bond_dev_private *internals = bd_tx_q->dev_private;
969
970         struct rte_eth_dev *primary_port =
971                         &rte_eth_devices[internals->primary_port];
972         uint16_t num_tx_total = 0;
973         uint8_t i, j;
974
975         uint8_t num_of_slaves = internals->active_slave_count;
976         uint8_t slaves[RTE_MAX_ETHPORTS];
977
978         struct ether_hdr *ether_hdr;
979         struct ether_addr primary_slave_addr;
980         struct ether_addr active_slave_addr;
981
982         if (num_of_slaves < 1)
983                 return num_tx_total;
984
985         memcpy(slaves, internals->tlb_slaves_order,
986                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
987
988
989         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
990
991         if (nb_pkts > 3) {
992                 for (i = 0; i < 3; i++)
993                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
994         }
995
996         for (i = 0; i < num_of_slaves; i++) {
997                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
998                 for (j = num_tx_total; j < nb_pkts; j++) {
999                         if (j + 3 < nb_pkts)
1000                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1001
1002                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1003                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1004                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1005 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1006                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1007 #endif
1008                 }
1009
1010                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1011                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1012
1013                 if (num_tx_total == nb_pkts)
1014                         break;
1015         }
1016
1017         return num_tx_total;
1018 }
1019
1020 void
1021 bond_tlb_disable(struct bond_dev_private *internals)
1022 {
1023         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1024 }
1025
1026 void
1027 bond_tlb_enable(struct bond_dev_private *internals)
1028 {
1029         bond_ethdev_update_tlb_slave_cb(internals);
1030 }
1031
1032 static uint16_t
1033 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1034 {
1035         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1036         struct bond_dev_private *internals = bd_tx_q->dev_private;
1037
1038         struct ether_hdr *eth_h;
1039         uint16_t ether_type, offset;
1040
1041         struct client_data *client_info;
1042
1043         /*
1044          * We create transmit buffers for every slave and one additional to send
1045          * through tlb. In worst case every packet will be send on one port.
1046          */
1047         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1048         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1049
1050         /*
1051          * We create separate transmit buffers for update packets as they won't
1052          * be counted in num_tx_total.
1053          */
1054         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1055         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1056
1057         struct rte_mbuf *upd_pkt;
1058         size_t pkt_size;
1059
1060         uint16_t num_send, num_not_send = 0;
1061         uint16_t num_tx_total = 0;
1062         uint8_t slave_idx;
1063
1064         int i, j;
1065
1066         /* Search tx buffer for ARP packets and forward them to alb */
1067         for (i = 0; i < nb_pkts; i++) {
1068                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1069                 ether_type = eth_h->ether_type;
1070                 offset = get_vlan_offset(eth_h, &ether_type);
1071
1072                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1073                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1074
1075                         /* Change src mac in eth header */
1076                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1077
1078                         /* Add packet to slave tx buffer */
1079                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1080                         slave_bufs_pkts[slave_idx]++;
1081                 } else {
1082                         /* If packet is not ARP, send it with TLB policy */
1083                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1084                                         bufs[i];
1085                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1086                 }
1087         }
1088
1089         /* Update connected client ARP tables */
1090         if (internals->mode6.ntt) {
1091                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1092                         client_info = &internals->mode6.client_table[i];
1093
1094                         if (client_info->in_use) {
1095                                 /* Allocate new packet to send ARP update on current slave */
1096                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1097                                 if (upd_pkt == NULL) {
1098                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1099                                         continue;
1100                                 }
1101                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1102                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1103                                 upd_pkt->data_len = pkt_size;
1104                                 upd_pkt->pkt_len = pkt_size;
1105
1106                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1107                                                 internals);
1108
1109                                 /* Add packet to update tx buffer */
1110                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1111                                 update_bufs_pkts[slave_idx]++;
1112                         }
1113                 }
1114                 internals->mode6.ntt = 0;
1115         }
1116
1117         /* Send ARP packets on proper slaves */
1118         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1119                 if (slave_bufs_pkts[i] > 0) {
1120                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1121                                         slave_bufs[i], slave_bufs_pkts[i]);
1122                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1123                                 bufs[nb_pkts - 1 - num_not_send - j] =
1124                                                 slave_bufs[i][nb_pkts - 1 - j];
1125                         }
1126
1127                         num_tx_total += num_send;
1128                         num_not_send += slave_bufs_pkts[i] - num_send;
1129
1130 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1131         /* Print TX stats including update packets */
1132                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1133                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1134                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1135                         }
1136 #endif
1137                 }
1138         }
1139
1140         /* Send update packets on proper slaves */
1141         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1142                 if (update_bufs_pkts[i] > 0) {
1143                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1144                                         update_bufs_pkts[i]);
1145                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1146                                 rte_pktmbuf_free(update_bufs[i][j]);
1147                         }
1148 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1149                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1150                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1151                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1152                         }
1153 #endif
1154                 }
1155         }
1156
1157         /* Send non-ARP packets using tlb policy */
1158         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1159                 num_send = bond_ethdev_tx_burst_tlb(queue,
1160                                 slave_bufs[RTE_MAX_ETHPORTS],
1161                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1162
1163                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1164                         bufs[nb_pkts - 1 - num_not_send - j] =
1165                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1166                 }
1167
1168                 num_tx_total += num_send;
1169         }
1170
1171         return num_tx_total;
1172 }
1173
1174 static uint16_t
1175 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1176                 uint16_t nb_pkts)
1177 {
1178         struct bond_dev_private *internals;
1179         struct bond_tx_queue *bd_tx_q;
1180
1181         uint8_t num_of_slaves;
1182         uint8_t slaves[RTE_MAX_ETHPORTS];
1183
1184         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1185
1186         int i, op_slave_id;
1187
1188         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1189         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1190
1191         bd_tx_q = (struct bond_tx_queue *)queue;
1192         internals = bd_tx_q->dev_private;
1193
1194         /* Copy slave list to protect against slave up/down changes during tx
1195          * bursting */
1196         num_of_slaves = internals->active_slave_count;
1197         memcpy(slaves, internals->active_slaves,
1198                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1199
1200         if (num_of_slaves < 1)
1201                 return num_tx_total;
1202
1203         /* Populate slaves mbuf with the packets which are to be sent on it  */
1204         for (i = 0; i < nb_pkts; i++) {
1205                 /* Select output slave using hash based on xmit policy */
1206                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1207
1208                 /* Populate slave mbuf arrays with mbufs for that slave */
1209                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1210         }
1211
1212         /* Send packet burst on each slave device */
1213         for (i = 0; i < num_of_slaves; i++) {
1214                 if (slave_nb_pkts[i] > 0) {
1215                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1216                                         slave_bufs[i], slave_nb_pkts[i]);
1217
1218                         /* if tx burst fails move packets to end of bufs */
1219                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1220                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1221
1222                                 tx_fail_total += slave_tx_fail_count;
1223                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1224                                                 &slave_bufs[i][num_tx_slave],
1225                                                 slave_tx_fail_count * sizeof(bufs[0]));
1226                         }
1227
1228                         num_tx_total += num_tx_slave;
1229                 }
1230         }
1231
1232         return num_tx_total;
1233 }
1234
1235 static uint16_t
1236 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1237                 uint16_t nb_pkts)
1238 {
1239         struct bond_dev_private *internals;
1240         struct bond_tx_queue *bd_tx_q;
1241
1242         uint8_t num_of_slaves;
1243         uint8_t slaves[RTE_MAX_ETHPORTS];
1244          /* positions in slaves, not ID */
1245         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1246         uint8_t distributing_count;
1247
1248         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1249         uint16_t i, j, op_slave_idx;
1250         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1251
1252         /* Allocate additional packets in case 8023AD mode. */
1253         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1254         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1255
1256         /* Total amount of packets in slave_bufs */
1257         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1258         /* Slow packets placed in each slave */
1259         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1260
1261         bd_tx_q = (struct bond_tx_queue *)queue;
1262         internals = bd_tx_q->dev_private;
1263
1264         /* Copy slave list to protect against slave up/down changes during tx
1265          * bursting */
1266         num_of_slaves = internals->active_slave_count;
1267         if (num_of_slaves < 1)
1268                 return num_tx_total;
1269
1270         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1271
1272         distributing_count = 0;
1273         for (i = 0; i < num_of_slaves; i++) {
1274                 struct port *port = &mode_8023ad_ports[slaves[i]];
1275
1276                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1277                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1278                                 NULL);
1279                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1280
1281                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1282                         slave_bufs[i][j] = slow_pkts[j];
1283
1284                 if (ACTOR_STATE(port, DISTRIBUTING))
1285                         distributing_offsets[distributing_count++] = i;
1286         }
1287
1288         if (likely(distributing_count > 0)) {
1289                 /* Populate slaves mbuf with the packets which are to be sent on it */
1290                 for (i = 0; i < nb_pkts; i++) {
1291                         /* Select output slave using hash based on xmit policy */
1292                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1293
1294                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1295                          * slaves that are currently distributing. */
1296                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1297                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1298                         slave_nb_pkts[slave_offset]++;
1299                 }
1300         }
1301
1302         /* Send packet burst on each slave device */
1303         for (i = 0; i < num_of_slaves; i++) {
1304                 if (slave_nb_pkts[i] == 0)
1305                         continue;
1306
1307                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1308                                 slave_bufs[i], slave_nb_pkts[i]);
1309
1310                 /* If tx burst fails drop slow packets */
1311                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1312                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1313
1314                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1315                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1316
1317                 /* If tx burst fails move packets to end of bufs */
1318                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1319                         uint16_t j = nb_pkts - num_tx_fail_total;
1320                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1321                                 bufs[j] = slave_bufs[i][num_tx_slave];
1322                 }
1323         }
1324
1325         return num_tx_total;
1326 }
1327
1328 static uint16_t
1329 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1330                 uint16_t nb_pkts)
1331 {
1332         struct bond_dev_private *internals;
1333         struct bond_tx_queue *bd_tx_q;
1334
1335         uint8_t tx_failed_flag = 0, num_of_slaves;
1336         uint8_t slaves[RTE_MAX_ETHPORTS];
1337
1338         uint16_t max_nb_of_tx_pkts = 0;
1339
1340         int slave_tx_total[RTE_MAX_ETHPORTS];
1341         int i, most_successful_tx_slave = -1;
1342
1343         bd_tx_q = (struct bond_tx_queue *)queue;
1344         internals = bd_tx_q->dev_private;
1345
1346         /* Copy slave list to protect against slave up/down changes during tx
1347          * bursting */
1348         num_of_slaves = internals->active_slave_count;
1349         memcpy(slaves, internals->active_slaves,
1350                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1351
1352         if (num_of_slaves < 1)
1353                 return 0;
1354
1355         /* Increment reference count on mbufs */
1356         for (i = 0; i < nb_pkts; i++)
1357                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1358
1359         /* Transmit burst on each active slave */
1360         for (i = 0; i < num_of_slaves; i++) {
1361                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1362                                         bufs, nb_pkts);
1363
1364                 if (unlikely(slave_tx_total[i] < nb_pkts))
1365                         tx_failed_flag = 1;
1366
1367                 /* record the value and slave index for the slave which transmits the
1368                  * maximum number of packets */
1369                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1370                         max_nb_of_tx_pkts = slave_tx_total[i];
1371                         most_successful_tx_slave = i;
1372                 }
1373         }
1374
1375         /* if slaves fail to transmit packets from burst, the calling application
1376          * is not expected to know about multiple references to packets so we must
1377          * handle failures of all packets except those of the most successful slave
1378          */
1379         if (unlikely(tx_failed_flag))
1380                 for (i = 0; i < num_of_slaves; i++)
1381                         if (i != most_successful_tx_slave)
1382                                 while (slave_tx_total[i] < nb_pkts)
1383                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1384
1385         return max_nb_of_tx_pkts;
1386 }
1387
1388 void
1389 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1390                 struct rte_eth_link *slave_dev_link)
1391 {
1392         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1393         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1394
1395         if (slave_dev_link->link_status &&
1396                 bonded_eth_dev->data->dev_started) {
1397                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1398                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1399
1400                 internals->link_props_set = 1;
1401         }
1402 }
1403
1404 void
1405 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1406 {
1407         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1408
1409         memset(&(bonded_eth_dev->data->dev_link), 0,
1410                         sizeof(bonded_eth_dev->data->dev_link));
1411
1412         internals->link_props_set = 0;
1413 }
1414
1415 int
1416 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1417                 struct rte_eth_link *slave_dev_link)
1418 {
1419         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1420                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1421                 return -1;
1422
1423         return 0;
1424 }
1425
1426 int
1427 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1428 {
1429         struct ether_addr *mac_addr;
1430
1431         if (eth_dev == NULL) {
1432                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1433                 return -1;
1434         }
1435
1436         if (dst_mac_addr == NULL) {
1437                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1438                 return -1;
1439         }
1440
1441         mac_addr = eth_dev->data->mac_addrs;
1442
1443         ether_addr_copy(mac_addr, dst_mac_addr);
1444         return 0;
1445 }
1446
1447 int
1448 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1449 {
1450         struct ether_addr *mac_addr;
1451
1452         if (eth_dev == NULL) {
1453                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1454                 return -1;
1455         }
1456
1457         if (new_mac_addr == NULL) {
1458                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1459                 return -1;
1460         }
1461
1462         mac_addr = eth_dev->data->mac_addrs;
1463
1464         /* If new MAC is different to current MAC then update */
1465         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1466                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1467
1468         return 0;
1469 }
1470
1471 int
1472 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1473 {
1474         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1475         int i;
1476
1477         /* Update slave devices MAC addresses */
1478         if (internals->slave_count < 1)
1479                 return -1;
1480
1481         switch (internals->mode) {
1482         case BONDING_MODE_ROUND_ROBIN:
1483         case BONDING_MODE_BALANCE:
1484         case BONDING_MODE_BROADCAST:
1485                 for (i = 0; i < internals->slave_count; i++) {
1486                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1487                                         bonded_eth_dev->data->mac_addrs)) {
1488                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1489                                                 internals->slaves[i].port_id);
1490                                 return -1;
1491                         }
1492                 }
1493                 break;
1494         case BONDING_MODE_8023AD:
1495                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1496                 break;
1497         case BONDING_MODE_ACTIVE_BACKUP:
1498         case BONDING_MODE_TLB:
1499         case BONDING_MODE_ALB:
1500         default:
1501                 for (i = 0; i < internals->slave_count; i++) {
1502                         if (internals->slaves[i].port_id ==
1503                                         internals->current_primary_port) {
1504                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1505                                                 bonded_eth_dev->data->mac_addrs)) {
1506                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1507                                                         internals->current_primary_port);
1508                                         return -1;
1509                                 }
1510                         } else {
1511                                 if (mac_address_set(
1512                                                 &rte_eth_devices[internals->slaves[i].port_id],
1513                                                 &internals->slaves[i].persisted_mac_addr)) {
1514                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1515                                                         internals->slaves[i].port_id);
1516                                         return -1;
1517                                 }
1518                         }
1519                 }
1520         }
1521
1522         return 0;
1523 }
1524
1525 int
1526 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1527 {
1528         struct bond_dev_private *internals;
1529
1530         internals = eth_dev->data->dev_private;
1531
1532         switch (mode) {
1533         case BONDING_MODE_ROUND_ROBIN:
1534                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1535                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1536                 break;
1537         case BONDING_MODE_ACTIVE_BACKUP:
1538                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1539                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1540                 break;
1541         case BONDING_MODE_BALANCE:
1542                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1543                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1544                 break;
1545         case BONDING_MODE_BROADCAST:
1546                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1547                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1548                 break;
1549         case BONDING_MODE_8023AD:
1550                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1551                         return -1;
1552
1553                 if (internals->mode4.dedicated_queues.enabled == 0) {
1554                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1555                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1556                         RTE_LOG(WARNING, PMD,
1557                                 "Using mode 4, it is necessary to do TX burst "
1558                                 "and RX burst at least every 100ms.\n");
1559                 } else {
1560                         /* Use flow director's optimization */
1561                         eth_dev->rx_pkt_burst =
1562                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1563                         eth_dev->tx_pkt_burst =
1564                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1565                 }
1566                 break;
1567         case BONDING_MODE_TLB:
1568                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1569                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1570                 break;
1571         case BONDING_MODE_ALB:
1572                 if (bond_mode_alb_enable(eth_dev) != 0)
1573                         return -1;
1574
1575                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1576                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1577                 break;
1578         default:
1579                 return -1;
1580         }
1581
1582         internals->mode = mode;
1583
1584         return 0;
1585 }
1586
1587
1588 static int
1589 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1590                 struct rte_eth_dev *slave_eth_dev)
1591 {
1592         int errval = 0;
1593         struct bond_dev_private *internals = (struct bond_dev_private *)
1594                 bonded_eth_dev->data->dev_private;
1595         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1596
1597         if (port->slow_pool == NULL) {
1598                 char mem_name[256];
1599                 int slave_id = slave_eth_dev->data->port_id;
1600
1601                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1602                                 slave_id);
1603                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1604                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1605                         slave_eth_dev->data->numa_node);
1606
1607                 /* Any memory allocation failure in initialization is critical because
1608                  * resources can't be free, so reinitialization is impossible. */
1609                 if (port->slow_pool == NULL) {
1610                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1611                                 slave_id, mem_name, rte_strerror(rte_errno));
1612                 }
1613         }
1614
1615         if (internals->mode4.dedicated_queues.enabled == 1) {
1616                 /* Configure slow Rx queue */
1617
1618                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1619                                 internals->mode4.dedicated_queues.rx_qid, 128,
1620                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1621                                 NULL, port->slow_pool);
1622                 if (errval != 0) {
1623                         RTE_BOND_LOG(ERR,
1624                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1625                                         slave_eth_dev->data->port_id,
1626                                         internals->mode4.dedicated_queues.rx_qid,
1627                                         errval);
1628                         return errval;
1629                 }
1630
1631                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1632                                 internals->mode4.dedicated_queues.tx_qid, 512,
1633                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1634                                 NULL);
1635                 if (errval != 0) {
1636                         RTE_BOND_LOG(ERR,
1637                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1638                                 slave_eth_dev->data->port_id,
1639                                 internals->mode4.dedicated_queues.tx_qid,
1640                                 errval);
1641                         return errval;
1642                 }
1643         }
1644         return 0;
1645 }
1646
1647 int
1648 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1649                 struct rte_eth_dev *slave_eth_dev)
1650 {
1651         struct bond_rx_queue *bd_rx_q;
1652         struct bond_tx_queue *bd_tx_q;
1653         uint16_t nb_rx_queues;
1654         uint16_t nb_tx_queues;
1655
1656         int errval;
1657         uint16_t q_id;
1658         struct rte_flow_error flow_error;
1659
1660         struct bond_dev_private *internals = (struct bond_dev_private *)
1661                 bonded_eth_dev->data->dev_private;
1662
1663         /* Stop slave */
1664         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1665
1666         /* Enable interrupts on slave device if supported */
1667         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1668                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1669
1670         /* If RSS is enabled for bonding, try to enable it for slaves  */
1671         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1672                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1673                                 != 0) {
1674                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1675                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1676                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1677                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1678                 } else {
1679                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1680                 }
1681
1682                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1683                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1684                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1685                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1686         }
1687
1688         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1689                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1690
1691         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1692         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1693
1694         if (internals->mode == BONDING_MODE_8023AD) {
1695                 if (internals->mode4.dedicated_queues.enabled == 1) {
1696                         nb_rx_queues++;
1697                         nb_tx_queues++;
1698                 }
1699         }
1700
1701         /* Configure device */
1702         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1703                         nb_rx_queues, nb_tx_queues,
1704                         &(slave_eth_dev->data->dev_conf));
1705         if (errval != 0) {
1706                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1707                                 slave_eth_dev->data->port_id, errval);
1708                 return errval;
1709         }
1710
1711         /* Setup Rx Queues */
1712         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1713                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1714
1715                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1716                                 bd_rx_q->nb_rx_desc,
1717                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1718                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1719                 if (errval != 0) {
1720                         RTE_BOND_LOG(ERR,
1721                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1722                                         slave_eth_dev->data->port_id, q_id, errval);
1723                         return errval;
1724                 }
1725         }
1726
1727         /* Setup Tx Queues */
1728         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1729                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1730
1731                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1732                                 bd_tx_q->nb_tx_desc,
1733                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1734                                 &bd_tx_q->tx_conf);
1735                 if (errval != 0) {
1736                         RTE_BOND_LOG(ERR,
1737                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1738                                 slave_eth_dev->data->port_id, q_id, errval);
1739                         return errval;
1740                 }
1741         }
1742
1743         if (internals->mode == BONDING_MODE_8023AD &&
1744                         internals->mode4.dedicated_queues.enabled == 1) {
1745                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1746                                 != 0)
1747                         return errval;
1748
1749                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1750                                 slave_eth_dev->data->port_id) != 0) {
1751                         RTE_BOND_LOG(ERR,
1752                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1753                                 slave_eth_dev->data->port_id, q_id, errval);
1754                         return -1;
1755                 }
1756
1757                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1758                         rte_flow_destroy(slave_eth_dev->data->port_id,
1759                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1760                                         &flow_error);
1761
1762                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1763                                 slave_eth_dev->data->port_id);
1764         }
1765
1766         /* Start device */
1767         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1768         if (errval != 0) {
1769                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1770                                 slave_eth_dev->data->port_id, errval);
1771                 return -1;
1772         }
1773
1774         /* If RSS is enabled for bonding, synchronize RETA */
1775         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1776                 int i;
1777                 struct bond_dev_private *internals;
1778
1779                 internals = bonded_eth_dev->data->dev_private;
1780
1781                 for (i = 0; i < internals->slave_count; i++) {
1782                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1783                                 errval = rte_eth_dev_rss_reta_update(
1784                                                 slave_eth_dev->data->port_id,
1785                                                 &internals->reta_conf[0],
1786                                                 internals->slaves[i].reta_size);
1787                                 if (errval != 0) {
1788                                         RTE_LOG(WARNING, PMD,
1789                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1790                                                         " RSS Configuration for bonding may be inconsistent.\n",
1791                                                         slave_eth_dev->data->port_id, errval);
1792                                 }
1793                                 break;
1794                         }
1795                 }
1796         }
1797
1798         /* If lsc interrupt is set, check initial slave's link status */
1799         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1800                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1801                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1802                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1803                         NULL);
1804         }
1805
1806         return 0;
1807 }
1808
1809 void
1810 slave_remove(struct bond_dev_private *internals,
1811                 struct rte_eth_dev *slave_eth_dev)
1812 {
1813         uint8_t i;
1814
1815         for (i = 0; i < internals->slave_count; i++)
1816                 if (internals->slaves[i].port_id ==
1817                                 slave_eth_dev->data->port_id)
1818                         break;
1819
1820         if (i < (internals->slave_count - 1))
1821                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1822                                 sizeof(internals->slaves[0]) *
1823                                 (internals->slave_count - i - 1));
1824
1825         internals->slave_count--;
1826
1827         /* force reconfiguration of slave interfaces */
1828         _rte_eth_dev_reset(slave_eth_dev);
1829 }
1830
1831 static void
1832 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1833
1834 void
1835 slave_add(struct bond_dev_private *internals,
1836                 struct rte_eth_dev *slave_eth_dev)
1837 {
1838         struct bond_slave_details *slave_details =
1839                         &internals->slaves[internals->slave_count];
1840
1841         slave_details->port_id = slave_eth_dev->data->port_id;
1842         slave_details->last_link_status = 0;
1843
1844         /* Mark slave devices that don't support interrupts so we can
1845          * compensate when we start the bond
1846          */
1847         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1848                 slave_details->link_status_poll_enabled = 1;
1849         }
1850
1851         slave_details->link_status_wait_to_complete = 0;
1852         /* clean tlb_last_obytes when adding port for bonding device */
1853         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1854                         sizeof(struct ether_addr));
1855 }
1856
1857 void
1858 bond_ethdev_primary_set(struct bond_dev_private *internals,
1859                 uint8_t slave_port_id)
1860 {
1861         int i;
1862
1863         if (internals->active_slave_count < 1)
1864                 internals->current_primary_port = slave_port_id;
1865         else
1866                 /* Search bonded device slave ports for new proposed primary port */
1867                 for (i = 0; i < internals->active_slave_count; i++) {
1868                         if (internals->active_slaves[i] == slave_port_id)
1869                                 internals->current_primary_port = slave_port_id;
1870                 }
1871 }
1872
1873 static void
1874 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1875
1876 static int
1877 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1878 {
1879         struct bond_dev_private *internals;
1880         int i;
1881
1882         /* slave eth dev will be started by bonded device */
1883         if (check_for_bonded_ethdev(eth_dev)) {
1884                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1885                                 eth_dev->data->port_id);
1886                 return -1;
1887         }
1888
1889         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1890         eth_dev->data->dev_started = 1;
1891
1892         internals = eth_dev->data->dev_private;
1893
1894         if (internals->slave_count == 0) {
1895                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1896                 return -1;
1897         }
1898
1899         if (internals->user_defined_mac == 0) {
1900                 struct ether_addr *new_mac_addr = NULL;
1901
1902                 for (i = 0; i < internals->slave_count; i++)
1903                         if (internals->slaves[i].port_id == internals->primary_port)
1904                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1905
1906                 if (new_mac_addr == NULL)
1907                         return -1;
1908
1909                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1910                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1911                                         eth_dev->data->port_id);
1912                         return -1;
1913                 }
1914         }
1915
1916         /* Update all slave devices MACs*/
1917         if (mac_address_slaves_update(eth_dev) != 0)
1918                 return -1;
1919
1920         /* If bonded device is configure in promiscuous mode then re-apply config */
1921         if (internals->promiscuous_en)
1922                 bond_ethdev_promiscuous_enable(eth_dev);
1923
1924         if (internals->mode == BONDING_MODE_8023AD) {
1925                 if (internals->mode4.dedicated_queues.enabled == 1) {
1926                         internals->mode4.dedicated_queues.rx_qid =
1927                                         eth_dev->data->nb_rx_queues;
1928                         internals->mode4.dedicated_queues.tx_qid =
1929                                         eth_dev->data->nb_tx_queues;
1930                 }
1931         }
1932
1933
1934         /* Reconfigure each slave device if starting bonded device */
1935         for (i = 0; i < internals->slave_count; i++) {
1936                 struct rte_eth_dev *slave_ethdev =
1937                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1938                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1939                         RTE_BOND_LOG(ERR,
1940                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1941                                 eth_dev->data->port_id,
1942                                 internals->slaves[i].port_id);
1943                         return -1;
1944                 }
1945                 /* We will need to poll for link status if any slave doesn't
1946                  * support interrupts
1947                  */
1948                 if (internals->slaves[i].link_status_poll_enabled)
1949                         internals->link_status_polling_enabled = 1;
1950         }
1951         /* start polling if needed */
1952         if (internals->link_status_polling_enabled) {
1953                 rte_eal_alarm_set(
1954                         internals->link_status_polling_interval_ms * 1000,
1955                         bond_ethdev_slave_link_status_change_monitor,
1956                         (void *)&rte_eth_devices[internals->port_id]);
1957         }
1958
1959         if (internals->user_defined_primary_port)
1960                 bond_ethdev_primary_set(internals, internals->primary_port);
1961
1962         if (internals->mode == BONDING_MODE_8023AD)
1963                 bond_mode_8023ad_start(eth_dev);
1964
1965         if (internals->mode == BONDING_MODE_TLB ||
1966                         internals->mode == BONDING_MODE_ALB)
1967                 bond_tlb_enable(internals);
1968
1969         return 0;
1970 }
1971
1972 static void
1973 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1974 {
1975         uint8_t i;
1976
1977         if (dev->data->rx_queues != NULL) {
1978                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1979                         rte_free(dev->data->rx_queues[i]);
1980                         dev->data->rx_queues[i] = NULL;
1981                 }
1982                 dev->data->nb_rx_queues = 0;
1983         }
1984
1985         if (dev->data->tx_queues != NULL) {
1986                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1987                         rte_free(dev->data->tx_queues[i]);
1988                         dev->data->tx_queues[i] = NULL;
1989                 }
1990                 dev->data->nb_tx_queues = 0;
1991         }
1992 }
1993
1994 void
1995 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1996 {
1997         struct bond_dev_private *internals = eth_dev->data->dev_private;
1998         uint8_t i;
1999
2000         if (internals->mode == BONDING_MODE_8023AD) {
2001                 struct port *port;
2002                 void *pkt = NULL;
2003
2004                 bond_mode_8023ad_stop(eth_dev);
2005
2006                 /* Discard all messages to/from mode 4 state machines */
2007                 for (i = 0; i < internals->active_slave_count; i++) {
2008                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2009
2010                         RTE_ASSERT(port->rx_ring != NULL);
2011                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2012                                 rte_pktmbuf_free(pkt);
2013
2014                         RTE_ASSERT(port->tx_ring != NULL);
2015                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2016                                 rte_pktmbuf_free(pkt);
2017                 }
2018         }
2019
2020         if (internals->mode == BONDING_MODE_TLB ||
2021                         internals->mode == BONDING_MODE_ALB) {
2022                 bond_tlb_disable(internals);
2023                 for (i = 0; i < internals->active_slave_count; i++)
2024                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2025         }
2026
2027         internals->active_slave_count = 0;
2028         internals->link_status_polling_enabled = 0;
2029         for (i = 0; i < internals->slave_count; i++)
2030                 internals->slaves[i].last_link_status = 0;
2031
2032         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2033         eth_dev->data->dev_started = 0;
2034 }
2035
2036 void
2037 bond_ethdev_close(struct rte_eth_dev *dev)
2038 {
2039         struct bond_dev_private *internals = dev->data->dev_private;
2040         uint8_t bond_port_id = internals->port_id;
2041         int skipped = 0;
2042
2043         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2044         while (internals->slave_count != skipped) {
2045                 uint8_t port_id = internals->slaves[skipped].port_id;
2046
2047                 rte_eth_dev_stop(port_id);
2048
2049                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2050                         RTE_LOG(ERR, EAL,
2051                                 "Failed to remove port %d from bonded device "
2052                                 "%s\n", port_id, dev->device->name);
2053                         skipped++;
2054                 }
2055         }
2056         bond_ethdev_free_queues(dev);
2057         rte_bitmap_reset(internals->vlan_filter_bmp);
2058 }
2059
2060 /* forward declaration */
2061 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2062
2063 static void
2064 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2065 {
2066         struct bond_dev_private *internals = dev->data->dev_private;
2067
2068         uint16_t max_nb_rx_queues = UINT16_MAX;
2069         uint16_t max_nb_tx_queues = UINT16_MAX;
2070
2071         dev_info->max_mac_addrs = 1;
2072
2073         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2074                         internals->candidate_max_rx_pktlen :
2075                         ETHER_MAX_JUMBO_FRAME_LEN;
2076
2077         /* Max number of tx/rx queues that the bonded device can support is the
2078          * minimum values of the bonded slaves, as all slaves must be capable
2079          * of supporting the same number of tx/rx queues.
2080          */
2081         if (internals->slave_count > 0) {
2082                 struct rte_eth_dev_info slave_info;
2083                 uint8_t idx;
2084
2085                 for (idx = 0; idx < internals->slave_count; idx++) {
2086                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2087                                         &slave_info);
2088
2089                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2090                                 max_nb_rx_queues = slave_info.max_rx_queues;
2091
2092                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2093                                 max_nb_tx_queues = slave_info.max_tx_queues;
2094                 }
2095         }
2096
2097         dev_info->max_rx_queues = max_nb_rx_queues;
2098         dev_info->max_tx_queues = max_nb_tx_queues;
2099
2100         /**
2101          * If dedicated hw queues enabled for link bonding device in LACP mode
2102          * then we need to reduce the maximum number of data path queues by 1.
2103          */
2104         if (internals->mode == BONDING_MODE_8023AD &&
2105                 internals->mode4.dedicated_queues.enabled == 1) {
2106                 dev_info->max_rx_queues--;
2107                 dev_info->max_tx_queues--;
2108         }
2109
2110         dev_info->min_rx_bufsize = 0;
2111
2112         dev_info->rx_offload_capa = internals->rx_offload_capa;
2113         dev_info->tx_offload_capa = internals->tx_offload_capa;
2114         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2115
2116         dev_info->reta_size = internals->reta_size;
2117 }
2118
2119 static int
2120 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2121 {
2122         int res;
2123         uint8_t i;
2124         struct bond_dev_private *internals = dev->data->dev_private;
2125
2126         /* don't do this while a slave is being added */
2127         rte_spinlock_lock(&internals->lock);
2128
2129         if (on)
2130                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2131         else
2132                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2133
2134         for (i = 0; i < internals->slave_count; i++) {
2135                 uint8_t port_id = internals->slaves[i].port_id;
2136
2137                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2138                 if (res == ENOTSUP)
2139                         RTE_LOG(WARNING, PMD,
2140                                 "Setting VLAN filter on slave port %u not supported.\n",
2141                                 port_id);
2142         }
2143
2144         rte_spinlock_unlock(&internals->lock);
2145         return 0;
2146 }
2147
2148 static int
2149 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2150                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2151                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2152 {
2153         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2154                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2155                                         0, dev->data->numa_node);
2156         if (bd_rx_q == NULL)
2157                 return -1;
2158
2159         bd_rx_q->queue_id = rx_queue_id;
2160         bd_rx_q->dev_private = dev->data->dev_private;
2161
2162         bd_rx_q->nb_rx_desc = nb_rx_desc;
2163
2164         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2165         bd_rx_q->mb_pool = mb_pool;
2166
2167         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2168
2169         return 0;
2170 }
2171
2172 static int
2173 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2174                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2175                 const struct rte_eth_txconf *tx_conf)
2176 {
2177         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2178                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2179                                         0, dev->data->numa_node);
2180
2181         if (bd_tx_q == NULL)
2182                 return -1;
2183
2184         bd_tx_q->queue_id = tx_queue_id;
2185         bd_tx_q->dev_private = dev->data->dev_private;
2186
2187         bd_tx_q->nb_tx_desc = nb_tx_desc;
2188         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2189
2190         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2191
2192         return 0;
2193 }
2194
2195 static void
2196 bond_ethdev_rx_queue_release(void *queue)
2197 {
2198         if (queue == NULL)
2199                 return;
2200
2201         rte_free(queue);
2202 }
2203
2204 static void
2205 bond_ethdev_tx_queue_release(void *queue)
2206 {
2207         if (queue == NULL)
2208                 return;
2209
2210         rte_free(queue);
2211 }
2212
2213 static void
2214 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2215 {
2216         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2217         struct bond_dev_private *internals;
2218
2219         /* Default value for polling slave found is true as we don't want to
2220          * disable the polling thread if we cannot get the lock */
2221         int i, polling_slave_found = 1;
2222
2223         if (cb_arg == NULL)
2224                 return;
2225
2226         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2227         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2228
2229         if (!bonded_ethdev->data->dev_started ||
2230                 !internals->link_status_polling_enabled)
2231                 return;
2232
2233         /* If device is currently being configured then don't check slaves link
2234          * status, wait until next period */
2235         if (rte_spinlock_trylock(&internals->lock)) {
2236                 if (internals->slave_count > 0)
2237                         polling_slave_found = 0;
2238
2239                 for (i = 0; i < internals->slave_count; i++) {
2240                         if (!internals->slaves[i].link_status_poll_enabled)
2241                                 continue;
2242
2243                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2244                         polling_slave_found = 1;
2245
2246                         /* Update slave link status */
2247                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2248                                         internals->slaves[i].link_status_wait_to_complete);
2249
2250                         /* if link status has changed since last checked then call lsc
2251                          * event callback */
2252                         if (slave_ethdev->data->dev_link.link_status !=
2253                                         internals->slaves[i].last_link_status) {
2254                                 internals->slaves[i].last_link_status =
2255                                                 slave_ethdev->data->dev_link.link_status;
2256
2257                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2258                                                 RTE_ETH_EVENT_INTR_LSC,
2259                                                 &bonded_ethdev->data->port_id,
2260                                                 NULL);
2261                         }
2262                 }
2263                 rte_spinlock_unlock(&internals->lock);
2264         }
2265
2266         if (polling_slave_found)
2267                 /* Set alarm to continue monitoring link status of slave ethdev's */
2268                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2269                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2270 }
2271
2272 static int
2273 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
2274                 int wait_to_complete)
2275 {
2276         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
2277
2278         if (!bonded_eth_dev->data->dev_started ||
2279                 internals->active_slave_count == 0) {
2280                 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2281                 return 0;
2282         } else {
2283                 struct rte_eth_dev *slave_eth_dev;
2284                 int i, link_up = 0;
2285
2286                 for (i = 0; i < internals->active_slave_count; i++) {
2287                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
2288
2289                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
2290                                         wait_to_complete);
2291                         if (slave_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2292                                 link_up = 1;
2293                                 break;
2294                         }
2295                 }
2296
2297                 bonded_eth_dev->data->dev_link.link_status = link_up;
2298         }
2299
2300         return 0;
2301 }
2302
2303 static void
2304 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2305 {
2306         struct bond_dev_private *internals = dev->data->dev_private;
2307         struct rte_eth_stats slave_stats;
2308         int i, j;
2309
2310         for (i = 0; i < internals->slave_count; i++) {
2311                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2312
2313                 stats->ipackets += slave_stats.ipackets;
2314                 stats->opackets += slave_stats.opackets;
2315                 stats->ibytes += slave_stats.ibytes;
2316                 stats->obytes += slave_stats.obytes;
2317                 stats->imissed += slave_stats.imissed;
2318                 stats->ierrors += slave_stats.ierrors;
2319                 stats->oerrors += slave_stats.oerrors;
2320                 stats->rx_nombuf += slave_stats.rx_nombuf;
2321
2322                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2323                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2324                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2325                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2326                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2327                         stats->q_errors[j] += slave_stats.q_errors[j];
2328                 }
2329
2330         }
2331 }
2332
2333 static void
2334 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2335 {
2336         struct bond_dev_private *internals = dev->data->dev_private;
2337         int i;
2338
2339         for (i = 0; i < internals->slave_count; i++)
2340                 rte_eth_stats_reset(internals->slaves[i].port_id);
2341 }
2342
2343 static void
2344 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2345 {
2346         struct bond_dev_private *internals = eth_dev->data->dev_private;
2347         int i;
2348
2349         internals->promiscuous_en = 1;
2350
2351         switch (internals->mode) {
2352         /* Promiscuous mode is propagated to all slaves */
2353         case BONDING_MODE_ROUND_ROBIN:
2354         case BONDING_MODE_BALANCE:
2355         case BONDING_MODE_BROADCAST:
2356                 for (i = 0; i < internals->slave_count; i++)
2357                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2358                 break;
2359         /* In mode4 promiscus mode is managed when slave is added/removed */
2360         case BONDING_MODE_8023AD:
2361                 break;
2362         /* Promiscuous mode is propagated only to primary slave */
2363         case BONDING_MODE_ACTIVE_BACKUP:
2364         case BONDING_MODE_TLB:
2365         case BONDING_MODE_ALB:
2366         default:
2367                 rte_eth_promiscuous_enable(internals->current_primary_port);
2368         }
2369 }
2370
2371 static void
2372 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2373 {
2374         struct bond_dev_private *internals = dev->data->dev_private;
2375         int i;
2376
2377         internals->promiscuous_en = 0;
2378
2379         switch (internals->mode) {
2380         /* Promiscuous mode is propagated to all slaves */
2381         case BONDING_MODE_ROUND_ROBIN:
2382         case BONDING_MODE_BALANCE:
2383         case BONDING_MODE_BROADCAST:
2384                 for (i = 0; i < internals->slave_count; i++)
2385                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2386                 break;
2387         /* In mode4 promiscus mode is set managed when slave is added/removed */
2388         case BONDING_MODE_8023AD:
2389                 break;
2390         /* Promiscuous mode is propagated only to primary slave */
2391         case BONDING_MODE_ACTIVE_BACKUP:
2392         case BONDING_MODE_TLB:
2393         case BONDING_MODE_ALB:
2394         default:
2395                 rte_eth_promiscuous_disable(internals->current_primary_port);
2396         }
2397 }
2398
2399 static void
2400 bond_ethdev_delayed_lsc_propagation(void *arg)
2401 {
2402         if (arg == NULL)
2403                 return;
2404
2405         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2406                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2407 }
2408
2409 int
2410 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
2411                 void *param, void *ret_param __rte_unused)
2412 {
2413         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
2414         struct bond_dev_private *internals;
2415         struct rte_eth_link link;
2416         int rc = -1;
2417
2418         int i, valid_slave = 0;
2419         uint8_t active_pos;
2420         uint8_t lsc_flag = 0;
2421
2422         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2423                 return rc;
2424
2425         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2426         slave_eth_dev = &rte_eth_devices[port_id];
2427
2428         if (check_for_bonded_ethdev(bonded_eth_dev))
2429                 return rc;
2430
2431         internals = bonded_eth_dev->data->dev_private;
2432
2433         /* If the device isn't started don't handle interrupts */
2434         if (!bonded_eth_dev->data->dev_started)
2435                 return rc;
2436
2437         /* verify that port_id is a valid slave of bonded port */
2438         for (i = 0; i < internals->slave_count; i++) {
2439                 if (internals->slaves[i].port_id == port_id) {
2440                         valid_slave = 1;
2441                         break;
2442                 }
2443         }
2444
2445         if (!valid_slave)
2446                 return rc;
2447
2448         /* Search for port in active port list */
2449         active_pos = find_slave_by_id(internals->active_slaves,
2450                         internals->active_slave_count, port_id);
2451
2452         rte_eth_link_get_nowait(port_id, &link);
2453         if (link.link_status) {
2454                 if (active_pos < internals->active_slave_count)
2455                         return rc;
2456
2457                 /* if no active slave ports then set this port to be primary port */
2458                 if (internals->active_slave_count < 1) {
2459                         /* If first active slave, then change link status */
2460                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2461                         internals->current_primary_port = port_id;
2462                         lsc_flag = 1;
2463
2464                         mac_address_slaves_update(bonded_eth_dev);
2465
2466                         /* Inherit eth dev link properties from first active slave */
2467                         link_properties_set(bonded_eth_dev,
2468                                         &(slave_eth_dev->data->dev_link));
2469                 } else {
2470                         if (link_properties_valid(
2471                                 &bonded_eth_dev->data->dev_link, &link) != 0) {
2472                                 slave_eth_dev->data->dev_flags &=
2473                                         (~RTE_ETH_DEV_BONDED_SLAVE);
2474                                 RTE_LOG(ERR, PMD,
2475                                         "port %u invalid speed/duplex\n",
2476                                         port_id);
2477                                 return rc;
2478                         }
2479                 }
2480
2481                 activate_slave(bonded_eth_dev, port_id);
2482
2483                 /* If user has defined the primary port then default to using it */
2484                 if (internals->user_defined_primary_port &&
2485                                 internals->primary_port == port_id)
2486                         bond_ethdev_primary_set(internals, port_id);
2487         } else {
2488                 if (active_pos == internals->active_slave_count)
2489                         return rc;
2490
2491                 /* Remove from active slave list */
2492                 deactivate_slave(bonded_eth_dev, port_id);
2493
2494                 /* No active slaves, change link status to down and reset other
2495                  * link properties */
2496                 if (internals->active_slave_count < 1) {
2497                         lsc_flag = 1;
2498                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2499
2500                         link_properties_reset(bonded_eth_dev);
2501                 }
2502
2503                 /* Update primary id, take first active slave from list or if none
2504                  * available set to -1 */
2505                 if (port_id == internals->current_primary_port) {
2506                         if (internals->active_slave_count > 0)
2507                                 bond_ethdev_primary_set(internals,
2508                                                 internals->active_slaves[0]);
2509                         else
2510                                 internals->current_primary_port = internals->primary_port;
2511                 }
2512         }
2513
2514         if (lsc_flag) {
2515                 /* Cancel any possible outstanding interrupts if delays are enabled */
2516                 if (internals->link_up_delay_ms > 0 ||
2517                         internals->link_down_delay_ms > 0)
2518                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2519                                         bonded_eth_dev);
2520
2521                 if (bonded_eth_dev->data->dev_link.link_status) {
2522                         if (internals->link_up_delay_ms > 0)
2523                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2524                                                 bond_ethdev_delayed_lsc_propagation,
2525                                                 (void *)bonded_eth_dev);
2526                         else
2527                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2528                                                 RTE_ETH_EVENT_INTR_LSC,
2529                                                 NULL, NULL);
2530
2531                 } else {
2532                         if (internals->link_down_delay_ms > 0)
2533                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2534                                                 bond_ethdev_delayed_lsc_propagation,
2535                                                 (void *)bonded_eth_dev);
2536                         else
2537                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2538                                                 RTE_ETH_EVENT_INTR_LSC,
2539                                                 NULL, NULL);
2540                 }
2541         }
2542         return 0;
2543 }
2544
2545 static int
2546 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2547                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2548 {
2549         unsigned i, j;
2550         int result = 0;
2551         int slave_reta_size;
2552         unsigned reta_count;
2553         struct bond_dev_private *internals = dev->data->dev_private;
2554
2555         if (reta_size != internals->reta_size)
2556                 return -EINVAL;
2557
2558          /* Copy RETA table */
2559         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2560
2561         for (i = 0; i < reta_count; i++) {
2562                 internals->reta_conf[i].mask = reta_conf[i].mask;
2563                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2564                         if ((reta_conf[i].mask >> j) & 0x01)
2565                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2566         }
2567
2568         /* Fill rest of array */
2569         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2570                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2571                                 sizeof(internals->reta_conf[0]) * reta_count);
2572
2573         /* Propagate RETA over slaves */
2574         for (i = 0; i < internals->slave_count; i++) {
2575                 slave_reta_size = internals->slaves[i].reta_size;
2576                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2577                                 &internals->reta_conf[0], slave_reta_size);
2578                 if (result < 0)
2579                         return result;
2580         }
2581
2582         return 0;
2583 }
2584
2585 static int
2586 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2587                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2588 {
2589         int i, j;
2590         struct bond_dev_private *internals = dev->data->dev_private;
2591
2592         if (reta_size != internals->reta_size)
2593                 return -EINVAL;
2594
2595          /* Copy RETA table */
2596         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2597                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2598                         if ((reta_conf[i].mask >> j) & 0x01)
2599                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2600
2601         return 0;
2602 }
2603
2604 static int
2605 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2606                 struct rte_eth_rss_conf *rss_conf)
2607 {
2608         int i, result = 0;
2609         struct bond_dev_private *internals = dev->data->dev_private;
2610         struct rte_eth_rss_conf bond_rss_conf;
2611
2612         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2613
2614         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2615
2616         if (bond_rss_conf.rss_hf != 0)
2617                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2618
2619         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2620                         sizeof(internals->rss_key)) {
2621                 if (bond_rss_conf.rss_key_len == 0)
2622                         bond_rss_conf.rss_key_len = 40;
2623                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2624                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2625                                 internals->rss_key_len);
2626         }
2627
2628         for (i = 0; i < internals->slave_count; i++) {
2629                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2630                                 &bond_rss_conf);
2631                 if (result < 0)
2632                         return result;
2633         }
2634
2635         return 0;
2636 }
2637
2638 static int
2639 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2640                 struct rte_eth_rss_conf *rss_conf)
2641 {
2642         struct bond_dev_private *internals = dev->data->dev_private;
2643
2644         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2645         rss_conf->rss_key_len = internals->rss_key_len;
2646         if (rss_conf->rss_key)
2647                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2648
2649         return 0;
2650 }
2651
2652 const struct eth_dev_ops default_dev_ops = {
2653         .dev_start            = bond_ethdev_start,
2654         .dev_stop             = bond_ethdev_stop,
2655         .dev_close            = bond_ethdev_close,
2656         .dev_configure        = bond_ethdev_configure,
2657         .dev_infos_get        = bond_ethdev_info,
2658         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2659         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2660         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2661         .rx_queue_release     = bond_ethdev_rx_queue_release,
2662         .tx_queue_release     = bond_ethdev_tx_queue_release,
2663         .link_update          = bond_ethdev_link_update,
2664         .stats_get            = bond_ethdev_stats_get,
2665         .stats_reset          = bond_ethdev_stats_reset,
2666         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2667         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2668         .reta_update          = bond_ethdev_rss_reta_update,
2669         .reta_query           = bond_ethdev_rss_reta_query,
2670         .rss_hash_update      = bond_ethdev_rss_hash_update,
2671         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2672 };
2673
2674 static int
2675 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2676 {
2677         const char *name = rte_vdev_device_name(dev);
2678         uint8_t socket_id = dev->device.numa_node;
2679         struct bond_dev_private *internals = NULL;
2680         struct rte_eth_dev *eth_dev = NULL;
2681         uint32_t vlan_filter_bmp_size;
2682
2683         /* now do all data allocation - for eth_dev structure, dummy pci driver
2684          * and internal (private) data
2685          */
2686
2687         /* reserve an ethdev entry */
2688         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2689         if (eth_dev == NULL) {
2690                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2691                 goto err;
2692         }
2693
2694         internals = eth_dev->data->dev_private;
2695         eth_dev->data->nb_rx_queues = (uint16_t)1;
2696         eth_dev->data->nb_tx_queues = (uint16_t)1;
2697
2698         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2699                         socket_id);
2700         if (eth_dev->data->mac_addrs == NULL) {
2701                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2702                 goto err;
2703         }
2704
2705         eth_dev->dev_ops = &default_dev_ops;
2706         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
2707                 RTE_ETH_DEV_DETACHABLE;
2708
2709         rte_spinlock_init(&internals->lock);
2710
2711         internals->port_id = eth_dev->data->port_id;
2712         internals->mode = BONDING_MODE_INVALID;
2713         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2714         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2715         internals->xmit_hash = xmit_l2_hash;
2716         internals->user_defined_mac = 0;
2717         internals->link_props_set = 0;
2718
2719         internals->link_status_polling_enabled = 0;
2720
2721         internals->link_status_polling_interval_ms =
2722                 DEFAULT_POLLING_INTERVAL_10_MS;
2723         internals->link_down_delay_ms = 0;
2724         internals->link_up_delay_ms = 0;
2725
2726         internals->slave_count = 0;
2727         internals->active_slave_count = 0;
2728         internals->rx_offload_capa = 0;
2729         internals->tx_offload_capa = 0;
2730         internals->candidate_max_rx_pktlen = 0;
2731         internals->max_rx_pktlen = 0;
2732
2733         /* Initially allow to choose any offload type */
2734         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2735
2736         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2737         memset(internals->slaves, 0, sizeof(internals->slaves));
2738
2739         /* Set mode 4 default configuration */
2740         bond_mode_8023ad_setup(eth_dev, NULL);
2741         if (bond_ethdev_mode_set(eth_dev, mode)) {
2742                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2743                                  eth_dev->data->port_id, mode);
2744                 goto err;
2745         }
2746
2747         vlan_filter_bmp_size =
2748                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2749         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2750                                                    RTE_CACHE_LINE_SIZE);
2751         if (internals->vlan_filter_bmpmem == NULL) {
2752                 RTE_BOND_LOG(ERR,
2753                              "Failed to allocate vlan bitmap for bonded device %u\n",
2754                              eth_dev->data->port_id);
2755                 goto err;
2756         }
2757
2758         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2759                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2760         if (internals->vlan_filter_bmp == NULL) {
2761                 RTE_BOND_LOG(ERR,
2762                              "Failed to init vlan bitmap for bonded device %u\n",
2763                              eth_dev->data->port_id);
2764                 rte_free(internals->vlan_filter_bmpmem);
2765                 goto err;
2766         }
2767
2768         return eth_dev->data->port_id;
2769
2770 err:
2771         rte_free(internals);
2772         if (eth_dev != NULL) {
2773                 rte_free(eth_dev->data->mac_addrs);
2774                 rte_eth_dev_release_port(eth_dev);
2775         }
2776         return -1;
2777 }
2778
2779 static int
2780 bond_probe(struct rte_vdev_device *dev)
2781 {
2782         const char *name;
2783         struct bond_dev_private *internals;
2784         struct rte_kvargs *kvlist;
2785         uint8_t bonding_mode, socket_id;
2786         int  arg_count, port_id;
2787
2788         if (!dev)
2789                 return -EINVAL;
2790
2791         name = rte_vdev_device_name(dev);
2792         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2793
2794         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2795                 pmd_bond_init_valid_arguments);
2796         if (kvlist == NULL)
2797                 return -1;
2798
2799         /* Parse link bonding mode */
2800         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2801                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2802                                 &bond_ethdev_parse_slave_mode_kvarg,
2803                                 &bonding_mode) != 0) {
2804                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2805                                         name);
2806                         goto parse_error;
2807                 }
2808         } else {
2809                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2810                                 "device %s\n", name);
2811                 goto parse_error;
2812         }
2813
2814         /* Parse socket id to create bonding device on */
2815         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2816         if (arg_count == 1) {
2817                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2818                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2819                                 != 0) {
2820                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2821                                         "bonded device %s\n", name);
2822                         goto parse_error;
2823                 }
2824         } else if (arg_count > 1) {
2825                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2826                                 "bonded device %s\n", name);
2827                 goto parse_error;
2828         } else {
2829                 socket_id = rte_socket_id();
2830         }
2831
2832         dev->device.numa_node = socket_id;
2833
2834         /* Create link bonding eth device */
2835         port_id = bond_alloc(dev, bonding_mode);
2836         if (port_id < 0) {
2837                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2838                                 "socket %u.\n", name, bonding_mode, socket_id);
2839                 goto parse_error;
2840         }
2841         internals = rte_eth_devices[port_id].data->dev_private;
2842         internals->kvlist = kvlist;
2843
2844         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2845                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2846         return 0;
2847
2848 parse_error:
2849         rte_kvargs_free(kvlist);
2850
2851         return -1;
2852 }
2853
2854 static int
2855 bond_remove(struct rte_vdev_device *dev)
2856 {
2857         struct rte_eth_dev *eth_dev;
2858         struct bond_dev_private *internals;
2859         const char *name;
2860
2861         if (!dev)
2862                 return -EINVAL;
2863
2864         name = rte_vdev_device_name(dev);
2865         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2866
2867         /* now free all data allocation - for eth_dev structure,
2868          * dummy pci driver and internal (private) data
2869          */
2870
2871         /* find an ethdev entry */
2872         eth_dev = rte_eth_dev_allocated(name);
2873         if (eth_dev == NULL)
2874                 return -ENODEV;
2875
2876         RTE_ASSERT(eth_dev->device == &dev->device);
2877
2878         internals = eth_dev->data->dev_private;
2879         if (internals->slave_count != 0)
2880                 return -EBUSY;
2881
2882         if (eth_dev->data->dev_started == 1) {
2883                 bond_ethdev_stop(eth_dev);
2884                 bond_ethdev_close(eth_dev);
2885         }
2886
2887         eth_dev->dev_ops = NULL;
2888         eth_dev->rx_pkt_burst = NULL;
2889         eth_dev->tx_pkt_burst = NULL;
2890
2891         internals = eth_dev->data->dev_private;
2892         rte_bitmap_free(internals->vlan_filter_bmp);
2893         rte_free(internals->vlan_filter_bmpmem);
2894         rte_free(eth_dev->data->dev_private);
2895         rte_free(eth_dev->data->mac_addrs);
2896
2897         rte_eth_dev_release_port(eth_dev);
2898
2899         return 0;
2900 }
2901
2902 /* this part will resolve the slave portids after all the other pdev and vdev
2903  * have been allocated */
2904 static int
2905 bond_ethdev_configure(struct rte_eth_dev *dev)
2906 {
2907         const char *name = dev->device->name;
2908         struct bond_dev_private *internals = dev->data->dev_private;
2909         struct rte_kvargs *kvlist = internals->kvlist;
2910         int arg_count;
2911         uint8_t port_id = dev - rte_eth_devices;
2912
2913         static const uint8_t default_rss_key[40] = {
2914                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2915                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2916                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2917                 0xBE, 0xAC, 0x01, 0xFA
2918         };
2919
2920         unsigned i, j;
2921
2922         /* If RSS is enabled, fill table and key with default values */
2923         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2924                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2925                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2926                 memcpy(internals->rss_key, default_rss_key, 40);
2927
2928                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2929                         internals->reta_conf[i].mask = ~0LL;
2930                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2931                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2932                 }
2933         }
2934
2935         /* set the max_rx_pktlen */
2936         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2937
2938         /*
2939          * if no kvlist, it means that this bonded device has been created
2940          * through the bonding api.
2941          */
2942         if (!kvlist)
2943                 return 0;
2944
2945         /* Parse MAC address for bonded device */
2946         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2947         if (arg_count == 1) {
2948                 struct ether_addr bond_mac;
2949
2950                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2951                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2952                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2953                                         name);
2954                         return -1;
2955                 }
2956
2957                 /* Set MAC address */
2958                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2959                         RTE_LOG(ERR, EAL,
2960                                         "Failed to set mac address on bonded device %s\n",
2961                                         name);
2962                         return -1;
2963                 }
2964         } else if (arg_count > 1) {
2965                 RTE_LOG(ERR, EAL,
2966                                 "MAC address can be specified only once for bonded device %s\n",
2967                                 name);
2968                 return -1;
2969         }
2970
2971         /* Parse/set balance mode transmit policy */
2972         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2973         if (arg_count == 1) {
2974                 uint8_t xmit_policy;
2975
2976                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2977                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2978                                                 0) {
2979                         RTE_LOG(INFO, EAL,
2980                                         "Invalid xmit policy specified for bonded device %s\n",
2981                                         name);
2982                         return -1;
2983                 }
2984
2985                 /* Set balance mode transmit policy*/
2986                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2987                         RTE_LOG(ERR, EAL,
2988                                         "Failed to set balance xmit policy on bonded device %s\n",
2989                                         name);
2990                         return -1;
2991                 }
2992         } else if (arg_count > 1) {
2993                 RTE_LOG(ERR, EAL,
2994                                 "Transmit policy can be specified only once for bonded device"
2995                                 " %s\n", name);
2996                 return -1;
2997         }
2998
2999         /* Parse/add slave ports to bonded device */
3000         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3001                 struct bond_ethdev_slave_ports slave_ports;
3002                 unsigned i;
3003
3004                 memset(&slave_ports, 0, sizeof(slave_ports));
3005
3006                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3007                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3008                         RTE_LOG(ERR, EAL,
3009                                         "Failed to parse slave ports for bonded device %s\n",
3010                                         name);
3011                         return -1;
3012                 }
3013
3014                 for (i = 0; i < slave_ports.slave_count; i++) {
3015                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3016                                 RTE_LOG(ERR, EAL,
3017                                                 "Failed to add port %d as slave to bonded device %s\n",
3018                                                 slave_ports.slaves[i], name);
3019                         }
3020                 }
3021
3022         } else {
3023                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3024                 return -1;
3025         }
3026
3027         /* Parse/set primary slave port id*/
3028         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3029         if (arg_count == 1) {
3030                 uint8_t primary_slave_port_id;
3031
3032                 if (rte_kvargs_process(kvlist,
3033                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3034                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3035                                 &primary_slave_port_id) < 0) {
3036                         RTE_LOG(INFO, EAL,
3037                                         "Invalid primary slave port id specified for bonded device"
3038                                         " %s\n", name);
3039                         return -1;
3040                 }
3041
3042                 /* Set balance mode transmit policy*/
3043                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
3044                                 != 0) {
3045                         RTE_LOG(ERR, EAL,
3046                                         "Failed to set primary slave port %d on bonded device %s\n",
3047                                         primary_slave_port_id, name);
3048                         return -1;
3049                 }
3050         } else if (arg_count > 1) {
3051                 RTE_LOG(INFO, EAL,
3052                                 "Primary slave can be specified only once for bonded device"
3053                                 " %s\n", name);
3054                 return -1;
3055         }
3056
3057         /* Parse link status monitor polling interval */
3058         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3059         if (arg_count == 1) {
3060                 uint32_t lsc_poll_interval_ms;
3061
3062                 if (rte_kvargs_process(kvlist,
3063                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3064                                 &bond_ethdev_parse_time_ms_kvarg,
3065                                 &lsc_poll_interval_ms) < 0) {
3066                         RTE_LOG(INFO, EAL,
3067                                         "Invalid lsc polling interval value specified for bonded"
3068                                         " device %s\n", name);
3069                         return -1;
3070                 }
3071
3072                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3073                                 != 0) {
3074                         RTE_LOG(ERR, EAL,
3075                                         "Failed to set lsc monitor polling interval (%u ms) on"
3076                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3077                         return -1;
3078                 }
3079         } else if (arg_count > 1) {
3080                 RTE_LOG(INFO, EAL,
3081                                 "LSC polling interval can be specified only once for bonded"
3082                                 " device %s\n", name);
3083                 return -1;
3084         }
3085
3086         /* Parse link up interrupt propagation delay */
3087         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3088         if (arg_count == 1) {
3089                 uint32_t link_up_delay_ms;
3090
3091                 if (rte_kvargs_process(kvlist,
3092                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3093                                 &bond_ethdev_parse_time_ms_kvarg,
3094                                 &link_up_delay_ms) < 0) {
3095                         RTE_LOG(INFO, EAL,
3096                                         "Invalid link up propagation delay value specified for"
3097                                         " bonded device %s\n", name);
3098                         return -1;
3099                 }
3100
3101                 /* Set balance mode transmit policy*/
3102                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3103                                 != 0) {
3104                         RTE_LOG(ERR, EAL,
3105                                         "Failed to set link up propagation delay (%u ms) on bonded"
3106                                         " device %s\n", link_up_delay_ms, name);
3107                         return -1;
3108                 }
3109         } else if (arg_count > 1) {
3110                 RTE_LOG(INFO, EAL,
3111                                 "Link up propagation delay can be specified only once for"
3112                                 " bonded device %s\n", name);
3113                 return -1;
3114         }
3115
3116         /* Parse link down interrupt propagation delay */
3117         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3118         if (arg_count == 1) {
3119                 uint32_t link_down_delay_ms;
3120
3121                 if (rte_kvargs_process(kvlist,
3122                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3123                                 &bond_ethdev_parse_time_ms_kvarg,
3124                                 &link_down_delay_ms) < 0) {
3125                         RTE_LOG(INFO, EAL,
3126                                         "Invalid link down propagation delay value specified for"
3127                                         " bonded device %s\n", name);
3128                         return -1;
3129                 }
3130
3131                 /* Set balance mode transmit policy*/
3132                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3133                                 != 0) {
3134                         RTE_LOG(ERR, EAL,
3135                                         "Failed to set link down propagation delay (%u ms) on"
3136                                         " bonded device %s\n", link_down_delay_ms, name);
3137                         return -1;
3138                 }
3139         } else if (arg_count > 1) {
3140                 RTE_LOG(INFO, EAL,
3141                                 "Link down propagation delay can be specified only once for"
3142                                 " bonded device %s\n", name);
3143                 return -1;
3144         }
3145
3146         return 0;
3147 }
3148
3149 struct rte_vdev_driver pmd_bond_drv = {
3150         .probe = bond_probe,
3151         .remove = bond_remove,
3152 };
3153
3154 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3155 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3156
3157 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3158         "slave=<ifc> "
3159         "primary=<ifc> "
3160         "mode=[0-6] "
3161         "xmit_policy=[l2 | l23 | l34] "
3162         "socket_id=<int> "
3163         "mac=<mac addr> "
3164         "lsc_poll_period_ms=<int> "
3165         "up_delay=<int> "
3166         "down_delay=<int>");