drivers: use SPDX tag for Intel copyright files
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
24
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
27
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
29
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
32
33 static inline size_t
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
35 {
36         size_t vlan_offset = 0;
37
38         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
40
41                 vlan_offset = sizeof(struct vlan_hdr);
42                 *proto = vlan_hdr->eth_proto;
43
44                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45                         vlan_hdr = vlan_hdr + 1;
46                         *proto = vlan_hdr->eth_proto;
47                         vlan_offset += sizeof(struct vlan_hdr);
48                 }
49         }
50         return vlan_offset;
51 }
52
53 static uint16_t
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
55 {
56         struct bond_dev_private *internals;
57
58         uint16_t num_rx_slave = 0;
59         uint16_t num_rx_total = 0;
60
61         int i;
62
63         /* Cast to structure, containing bonded device's port id and queue id */
64         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
65
66         internals = bd_rx_q->dev_private;
67
68
69         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70                 /* Offset of pointer to *bufs increases as packets are received
71                  * from other slaves */
72                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
74                 if (num_rx_slave) {
75                         num_rx_total += num_rx_slave;
76                         nb_pkts -= num_rx_slave;
77                 }
78         }
79
80         return num_rx_total;
81 }
82
83 static uint16_t
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
85                 uint16_t nb_pkts)
86 {
87         struct bond_dev_private *internals;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94         return rte_eth_rx_burst(internals->current_primary_port,
95                         bd_rx_q->queue_id, bufs, nb_pkts);
96 }
97
98 static inline uint8_t
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
100 {
101         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
102
103         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104                 (ethertype == ether_type_slow_be &&
105                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
106 }
107
108 /*****************************************************************************
109  * Flow director's setup for mode 4 optimization
110  */
111
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113         .dst.addr_bytes = { 0 },
114         .src.addr_bytes = { 0 },
115         .type = RTE_BE16(ETHER_TYPE_SLOW),
116 };
117
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119         .dst.addr_bytes = { 0 },
120         .src.addr_bytes = { 0 },
121         .type = 0xFFFF,
122 };
123
124 static struct rte_flow_item flow_item_8023ad[] = {
125         {
126                 .type = RTE_FLOW_ITEM_TYPE_ETH,
127                 .spec = &flow_item_eth_type_8023ad,
128                 .last = NULL,
129                 .mask = &flow_item_eth_mask_type_8023ad,
130         },
131         {
132                 .type = RTE_FLOW_ITEM_TYPE_END,
133                 .spec = NULL,
134                 .last = NULL,
135                 .mask = NULL,
136         }
137 };
138
139 const struct rte_flow_attr flow_attr_8023ad = {
140         .group = 0,
141         .priority = 0,
142         .ingress = 1,
143         .egress = 0,
144         .reserved = 0,
145 };
146
147 int
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149                 uint16_t slave_port) {
150         struct rte_eth_dev_info slave_info;
151         struct rte_flow_error error;
152         struct bond_dev_private *internals = (struct bond_dev_private *)
153                         (bond_dev->data->dev_private);
154
155         const struct rte_flow_action_queue lacp_queue_conf = {
156                 .index = 0,
157         };
158
159         const struct rte_flow_action actions[] = {
160                 {
161                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162                         .conf = &lacp_queue_conf
163                 },
164                 {
165                         .type = RTE_FLOW_ACTION_TYPE_END,
166                 }
167         };
168
169         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170                         flow_item_8023ad, actions, &error);
171         if (ret < 0) {
172                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173                                 __func__, error.message, slave_port,
174                                 internals->mode4.dedicated_queues.rx_qid);
175                 return -1;
176         }
177
178         rte_eth_dev_info_get(slave_port, &slave_info);
179         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
181                 RTE_BOND_LOG(ERR,
182                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183                         __func__, slave_port);
184                 return -1;
185         }
186
187         return 0;
188 }
189
190 int
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193         struct bond_dev_private *internals = (struct bond_dev_private *)
194                         (bond_dev->data->dev_private);
195         struct rte_eth_dev_info bond_info;
196         uint16_t idx;
197
198         /* Verify if all slaves in bonding supports flow director and */
199         if (internals->slave_count > 0) {
200                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
201
202                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
204
205                 for (idx = 0; idx < internals->slave_count; idx++) {
206                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
207                                         internals->slaves[idx].port_id) != 0)
208                                 return -1;
209                 }
210         }
211
212         return 0;
213 }
214
215 int
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
217
218         struct rte_flow_error error;
219         struct bond_dev_private *internals = (struct bond_dev_private *)
220                         (bond_dev->data->dev_private);
221
222         struct rte_flow_action_queue lacp_queue_conf = {
223                 .index = internals->mode4.dedicated_queues.rx_qid,
224         };
225
226         const struct rte_flow_action actions[] = {
227                 {
228                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229                         .conf = &lacp_queue_conf
230                 },
231                 {
232                         .type = RTE_FLOW_ACTION_TYPE_END,
233                 }
234         };
235
236         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240                                 "(slave_port=%d queue_id=%d)",
241                                 error.message, slave_port,
242                                 internals->mode4.dedicated_queues.rx_qid);
243                 return -1;
244         }
245
246         return 0;
247 }
248
249 static uint16_t
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
251                 uint16_t nb_pkts)
252 {
253         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254         struct bond_dev_private *internals = bd_rx_q->dev_private;
255         uint16_t num_rx_total = 0;      /* Total number of received packets */
256         uint16_t slaves[RTE_MAX_ETHPORTS];
257         uint16_t slave_count;
258
259         uint16_t i, idx;
260
261         /* Copy slave list to protect against slave up/down changes during tx
262          * bursting */
263         slave_count = internals->active_slave_count;
264         memcpy(slaves, internals->active_slaves,
265                         sizeof(internals->active_slaves[0]) * slave_count);
266
267         for (i = 0, idx = internals->active_slave;
268                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269                 idx = idx % slave_count;
270
271                 /* Read packets from this slave */
272                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
274         }
275
276         internals->active_slave = idx;
277
278         return num_rx_total;
279 }
280
281 static uint16_t
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
283                 uint16_t nb_pkts)
284 {
285         struct bond_dev_private *internals;
286         struct bond_tx_queue *bd_tx_q;
287
288         uint16_t num_of_slaves;
289         uint16_t slaves[RTE_MAX_ETHPORTS];
290          /* positions in slaves, not ID */
291         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
292         uint8_t distributing_count;
293
294         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
295         uint16_t i, op_slave_idx;
296
297         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
298
299         /* Total amount of packets in slave_bufs */
300         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
301         /* Slow packets placed in each slave */
302
303         if (unlikely(nb_pkts == 0))
304                 return 0;
305
306         bd_tx_q = (struct bond_tx_queue *)queue;
307         internals = bd_tx_q->dev_private;
308
309         /* Copy slave list to protect against slave up/down changes during tx
310          * bursting */
311         num_of_slaves = internals->active_slave_count;
312         if (num_of_slaves < 1)
313                 return num_tx_total;
314
315         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
316                         num_of_slaves);
317
318         distributing_count = 0;
319         for (i = 0; i < num_of_slaves; i++) {
320                 struct port *port = &mode_8023ad_ports[slaves[i]];
321                 if (ACTOR_STATE(port, DISTRIBUTING))
322                         distributing_offsets[distributing_count++] = i;
323         }
324
325         if (likely(distributing_count > 0)) {
326                 /* Populate slaves mbuf with the packets which are to be sent */
327                 for (i = 0; i < nb_pkts; i++) {
328                         /* Select output slave using hash based on xmit policy */
329                         op_slave_idx = internals->xmit_hash(bufs[i],
330                                         distributing_count);
331
332                         /* Populate slave mbuf arrays with mbufs for that slave.
333                          * Use only slaves that are currently distributing.
334                          */
335                         uint8_t slave_offset =
336                                         distributing_offsets[op_slave_idx];
337                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
338                                         bufs[i];
339                         slave_nb_pkts[slave_offset]++;
340                 }
341         }
342
343         /* Send packet burst on each slave device */
344         for (i = 0; i < num_of_slaves; i++) {
345                 if (slave_nb_pkts[i] == 0)
346                         continue;
347
348                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
349                                 slave_bufs[i], slave_nb_pkts[i]);
350
351                 num_tx_total += num_tx_slave;
352                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
353
354                 /* If tx burst fails move packets to end of bufs */
355                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
356                         uint16_t j = nb_pkts - num_tx_fail_total;
357                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
358                                         num_tx_slave++)
359                                 bufs[j] = slave_bufs[i][num_tx_slave];
360                 }
361         }
362
363         return num_tx_total;
364 }
365
366
367 static uint16_t
368 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
369                 uint16_t nb_pkts)
370 {
371         /* Cast to structure, containing bonded device's port id and queue id */
372         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
373         struct bond_dev_private *internals = bd_rx_q->dev_private;
374         struct ether_addr bond_mac;
375
376         struct ether_hdr *hdr;
377
378         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
379         uint16_t num_rx_total = 0;      /* Total number of received packets */
380         uint16_t slaves[RTE_MAX_ETHPORTS];
381         uint16_t slave_count, idx;
382
383         uint8_t collecting;  /* current slave collecting status */
384         const uint8_t promisc = internals->promiscuous_en;
385         uint8_t i, j, k;
386         uint8_t subtype;
387
388         rte_eth_macaddr_get(internals->port_id, &bond_mac);
389         /* Copy slave list to protect against slave up/down changes during tx
390          * bursting */
391         slave_count = internals->active_slave_count;
392         memcpy(slaves, internals->active_slaves,
393                         sizeof(internals->active_slaves[0]) * slave_count);
394
395         idx = internals->active_slave;
396         if (idx >= slave_count) {
397                 internals->active_slave = 0;
398                 idx = 0;
399         }
400         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
401                 j = num_rx_total;
402                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
403                                          COLLECTING);
404
405                 /* Read packets from this slave */
406                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
407                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
408
409                 for (k = j; k < 2 && k < num_rx_total; k++)
410                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
411
412                 /* Handle slow protocol packets. */
413                 while (j < num_rx_total) {
414
415                         /* If packet is not pure L2 and is known, skip it */
416                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
417                                 j++;
418                                 continue;
419                         }
420
421                         if (j + 3 < num_rx_total)
422                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
423
424                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
425                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
426
427                         /* Remove packet from array if it is slow packet or slave is not
428                          * in collecting state or bonding interface is not in promiscuous
429                          * mode and packet address does not match. */
430                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
431                                 !collecting || (!promisc &&
432                                         !is_multicast_ether_addr(&hdr->d_addr) &&
433                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
434
435                                 if (hdr->ether_type == ether_type_slow_be) {
436                                         bond_mode_8023ad_handle_slow_pkt(
437                                             internals, slaves[idx], bufs[j]);
438                                 } else
439                                         rte_pktmbuf_free(bufs[j]);
440
441                                 /* Packet is managed by mode 4 or dropped, shift the array */
442                                 num_rx_total--;
443                                 if (j < num_rx_total) {
444                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
445                                                 (num_rx_total - j));
446                                 }
447                         } else
448                                 j++;
449                 }
450                 if (unlikely(++idx == slave_count))
451                         idx = 0;
452         }
453
454         internals->active_slave = idx;
455         return num_rx_total;
456 }
457
458 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
459 uint32_t burstnumberRX;
460 uint32_t burstnumberTX;
461
462 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
463
464 static void
465 arp_op_name(uint16_t arp_op, char *buf)
466 {
467         switch (arp_op) {
468         case ARP_OP_REQUEST:
469                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
470                 return;
471         case ARP_OP_REPLY:
472                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
473                 return;
474         case ARP_OP_REVREQUEST:
475                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
476                                 "Reverse ARP Request");
477                 return;
478         case ARP_OP_REVREPLY:
479                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
480                                 "Reverse ARP Reply");
481                 return;
482         case ARP_OP_INVREQUEST:
483                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
484                                 "Peer Identify Request");
485                 return;
486         case ARP_OP_INVREPLY:
487                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
488                                 "Peer Identify Reply");
489                 return;
490         default:
491                 break;
492         }
493         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
494         return;
495 }
496 #endif
497 #define MaxIPv4String   16
498 static void
499 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
500 {
501         uint32_t ipv4_addr;
502
503         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
504         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
505                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
506                 ipv4_addr & 0xFF);
507 }
508
509 #define MAX_CLIENTS_NUMBER      128
510 uint8_t active_clients;
511 struct client_stats_t {
512         uint16_t port;
513         uint32_t ipv4_addr;
514         uint32_t ipv4_rx_packets;
515         uint32_t ipv4_tx_packets;
516 };
517 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
518
519 static void
520 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
521 {
522         int i = 0;
523
524         for (; i < MAX_CLIENTS_NUMBER; i++)     {
525                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
526                         /* Just update RX packets number for this client */
527                         if (TXorRXindicator == &burstnumberRX)
528                                 client_stats[i].ipv4_rx_packets++;
529                         else
530                                 client_stats[i].ipv4_tx_packets++;
531                         return;
532                 }
533         }
534         /* We have a new client. Insert him to the table, and increment stats */
535         if (TXorRXindicator == &burstnumberRX)
536                 client_stats[active_clients].ipv4_rx_packets++;
537         else
538                 client_stats[active_clients].ipv4_tx_packets++;
539         client_stats[active_clients].ipv4_addr = addr;
540         client_stats[active_clients].port = port;
541         active_clients++;
542
543 }
544
545 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
546 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
547                 RTE_LOG(DEBUG, PMD, \
548                 "%s " \
549                 "port:%d " \
550                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
551                 "SrcIP:%s " \
552                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
553                 "DstIP:%s " \
554                 "%s " \
555                 "%d\n", \
556                 info, \
557                 port, \
558                 eth_h->s_addr.addr_bytes[0], \
559                 eth_h->s_addr.addr_bytes[1], \
560                 eth_h->s_addr.addr_bytes[2], \
561                 eth_h->s_addr.addr_bytes[3], \
562                 eth_h->s_addr.addr_bytes[4], \
563                 eth_h->s_addr.addr_bytes[5], \
564                 src_ip, \
565                 eth_h->d_addr.addr_bytes[0], \
566                 eth_h->d_addr.addr_bytes[1], \
567                 eth_h->d_addr.addr_bytes[2], \
568                 eth_h->d_addr.addr_bytes[3], \
569                 eth_h->d_addr.addr_bytes[4], \
570                 eth_h->d_addr.addr_bytes[5], \
571                 dst_ip, \
572                 arp_op, \
573                 ++burstnumber)
574 #endif
575
576 static void
577 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
578                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
579 {
580         struct ipv4_hdr *ipv4_h;
581 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
582         struct arp_hdr *arp_h;
583         char dst_ip[16];
584         char ArpOp[24];
585         char buf[16];
586 #endif
587         char src_ip[16];
588
589         uint16_t ether_type = eth_h->ether_type;
590         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
591
592 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
593         snprintf(buf, 16, "%s", info);
594 #endif
595
596         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
597                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
598                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
599 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
600                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
601                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
602 #endif
603                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
604         }
605 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
606         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
607                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
609                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
610                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
612         }
613 #endif
614 }
615 #endif
616
617 static uint16_t
618 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
619 {
620         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
621         struct bond_dev_private *internals = bd_tx_q->dev_private;
622         struct ether_hdr *eth_h;
623         uint16_t ether_type, offset;
624         uint16_t nb_recv_pkts;
625         int i;
626
627         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
628
629         for (i = 0; i < nb_recv_pkts; i++) {
630                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
631                 ether_type = eth_h->ether_type;
632                 offset = get_vlan_offset(eth_h, &ether_type);
633
634                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
637 #endif
638                         bond_mode_alb_arp_recv(eth_h, offset, internals);
639                 }
640 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
641                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
642                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
643 #endif
644         }
645
646         return nb_recv_pkts;
647 }
648
649 static uint16_t
650 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
651                 uint16_t nb_pkts)
652 {
653         struct bond_dev_private *internals;
654         struct bond_tx_queue *bd_tx_q;
655
656         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
657         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
658
659         uint16_t num_of_slaves;
660         uint16_t slaves[RTE_MAX_ETHPORTS];
661
662         uint16_t num_tx_total = 0, num_tx_slave;
663
664         static int slave_idx = 0;
665         int i, cslave_idx = 0, tx_fail_total = 0;
666
667         bd_tx_q = (struct bond_tx_queue *)queue;
668         internals = bd_tx_q->dev_private;
669
670         /* Copy slave list to protect against slave up/down changes during tx
671          * bursting */
672         num_of_slaves = internals->active_slave_count;
673         memcpy(slaves, internals->active_slaves,
674                         sizeof(internals->active_slaves[0]) * num_of_slaves);
675
676         if (num_of_slaves < 1)
677                 return num_tx_total;
678
679         /* Populate slaves mbuf with which packets are to be sent on it  */
680         for (i = 0; i < nb_pkts; i++) {
681                 cslave_idx = (slave_idx + i) % num_of_slaves;
682                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
683         }
684
685         /* increment current slave index so the next call to tx burst starts on the
686          * next slave */
687         slave_idx = ++cslave_idx;
688
689         /* Send packet burst on each slave device */
690         for (i = 0; i < num_of_slaves; i++) {
691                 if (slave_nb_pkts[i] > 0) {
692                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
693                                         slave_bufs[i], slave_nb_pkts[i]);
694
695                         /* if tx burst fails move packets to end of bufs */
696                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
697                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
698
699                                 tx_fail_total += tx_fail_slave;
700
701                                 memcpy(&bufs[nb_pkts - tx_fail_total],
702                                                 &slave_bufs[i][num_tx_slave],
703                                                 tx_fail_slave * sizeof(bufs[0]));
704                         }
705                         num_tx_total += num_tx_slave;
706                 }
707         }
708
709         return num_tx_total;
710 }
711
712 static uint16_t
713 bond_ethdev_tx_burst_active_backup(void *queue,
714                 struct rte_mbuf **bufs, uint16_t nb_pkts)
715 {
716         struct bond_dev_private *internals;
717         struct bond_tx_queue *bd_tx_q;
718
719         bd_tx_q = (struct bond_tx_queue *)queue;
720         internals = bd_tx_q->dev_private;
721
722         if (internals->active_slave_count < 1)
723                 return 0;
724
725         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
726                         bufs, nb_pkts);
727 }
728
729 static inline uint16_t
730 ether_hash(struct ether_hdr *eth_hdr)
731 {
732         unaligned_uint16_t *word_src_addr =
733                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
734         unaligned_uint16_t *word_dst_addr =
735                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
736
737         return (word_src_addr[0] ^ word_dst_addr[0]) ^
738                         (word_src_addr[1] ^ word_dst_addr[1]) ^
739                         (word_src_addr[2] ^ word_dst_addr[2]);
740 }
741
742 static inline uint32_t
743 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
744 {
745         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
746 }
747
748 static inline uint32_t
749 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
750 {
751         unaligned_uint32_t *word_src_addr =
752                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
753         unaligned_uint32_t *word_dst_addr =
754                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
755
756         return (word_src_addr[0] ^ word_dst_addr[0]) ^
757                         (word_src_addr[1] ^ word_dst_addr[1]) ^
758                         (word_src_addr[2] ^ word_dst_addr[2]) ^
759                         (word_src_addr[3] ^ word_dst_addr[3]);
760 }
761
762 uint16_t
763 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
764 {
765         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
766
767         uint32_t hash = ether_hash(eth_hdr);
768
769         return (hash ^= hash >> 8) % slave_count;
770 }
771
772 uint16_t
773 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
774 {
775         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
776         uint16_t proto = eth_hdr->ether_type;
777         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
778         uint32_t hash, l3hash = 0;
779
780         hash = ether_hash(eth_hdr);
781
782         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
783                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
784                                 ((char *)(eth_hdr + 1) + vlan_offset);
785                 l3hash = ipv4_hash(ipv4_hdr);
786
787         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
788                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
789                                 ((char *)(eth_hdr + 1) + vlan_offset);
790                 l3hash = ipv6_hash(ipv6_hdr);
791         }
792
793         hash = hash ^ l3hash;
794         hash ^= hash >> 16;
795         hash ^= hash >> 8;
796
797         return hash % slave_count;
798 }
799
800 uint16_t
801 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
802 {
803         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
804         uint16_t proto = eth_hdr->ether_type;
805         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
806
807         struct udp_hdr *udp_hdr = NULL;
808         struct tcp_hdr *tcp_hdr = NULL;
809         uint32_t hash, l3hash = 0, l4hash = 0;
810
811         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                 ((char *)(eth_hdr + 1) + vlan_offset);
814                 size_t ip_hdr_offset;
815
816                 l3hash = ipv4_hash(ipv4_hdr);
817
818                 /* there is no L4 header in fragmented packet */
819                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
820                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
821                                         IPV4_IHL_MULTIPLIER;
822
823                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
824                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
825                                                 ip_hdr_offset);
826                                 l4hash = HASH_L4_PORTS(tcp_hdr);
827                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
828                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
829                                                 ip_hdr_offset);
830                                 l4hash = HASH_L4_PORTS(udp_hdr);
831                         }
832                 }
833         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
834                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
835                                 ((char *)(eth_hdr + 1) + vlan_offset);
836                 l3hash = ipv6_hash(ipv6_hdr);
837
838                 if (ipv6_hdr->proto == IPPROTO_TCP) {
839                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
840                         l4hash = HASH_L4_PORTS(tcp_hdr);
841                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
842                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
843                         l4hash = HASH_L4_PORTS(udp_hdr);
844                 }
845         }
846
847         hash = l3hash ^ l4hash;
848         hash ^= hash >> 16;
849         hash ^= hash >> 8;
850
851         return hash % slave_count;
852 }
853
854 struct bwg_slave {
855         uint64_t bwg_left_int;
856         uint64_t bwg_left_remainder;
857         uint8_t slave;
858 };
859
860 void
861 bond_tlb_activate_slave(struct bond_dev_private *internals) {
862         int i;
863
864         for (i = 0; i < internals->active_slave_count; i++) {
865                 tlb_last_obytets[internals->active_slaves[i]] = 0;
866         }
867 }
868
869 static int
870 bandwidth_cmp(const void *a, const void *b)
871 {
872         const struct bwg_slave *bwg_a = a;
873         const struct bwg_slave *bwg_b = b;
874         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
875         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
876                         (int64_t)bwg_a->bwg_left_remainder;
877         if (diff > 0)
878                 return 1;
879         else if (diff < 0)
880                 return -1;
881         else if (diff2 > 0)
882                 return 1;
883         else if (diff2 < 0)
884                 return -1;
885         else
886                 return 0;
887 }
888
889 static void
890 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
891                 struct bwg_slave *bwg_slave)
892 {
893         struct rte_eth_link link_status;
894
895         rte_eth_link_get_nowait(port_id, &link_status);
896         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
897         if (link_bwg == 0)
898                 return;
899         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
900         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
901         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
902 }
903
904 static void
905 bond_ethdev_update_tlb_slave_cb(void *arg)
906 {
907         struct bond_dev_private *internals = arg;
908         struct rte_eth_stats slave_stats;
909         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
910         uint8_t slave_count;
911         uint64_t tx_bytes;
912
913         uint8_t update_stats = 0;
914         uint8_t i, slave_id;
915
916         internals->slave_update_idx++;
917
918
919         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
920                 update_stats = 1;
921
922         for (i = 0; i < internals->active_slave_count; i++) {
923                 slave_id = internals->active_slaves[i];
924                 rte_eth_stats_get(slave_id, &slave_stats);
925                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
926                 bandwidth_left(slave_id, tx_bytes,
927                                 internals->slave_update_idx, &bwg_array[i]);
928                 bwg_array[i].slave = slave_id;
929
930                 if (update_stats) {
931                         tlb_last_obytets[slave_id] = slave_stats.obytes;
932                 }
933         }
934
935         if (update_stats == 1)
936                 internals->slave_update_idx = 0;
937
938         slave_count = i;
939         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
940         for (i = 0; i < slave_count; i++)
941                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
942
943         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
944                         (struct bond_dev_private *)internals);
945 }
946
947 static uint16_t
948 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
949 {
950         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
951         struct bond_dev_private *internals = bd_tx_q->dev_private;
952
953         struct rte_eth_dev *primary_port =
954                         &rte_eth_devices[internals->primary_port];
955         uint16_t num_tx_total = 0;
956         uint16_t i, j;
957
958         uint16_t num_of_slaves = internals->active_slave_count;
959         uint16_t slaves[RTE_MAX_ETHPORTS];
960
961         struct ether_hdr *ether_hdr;
962         struct ether_addr primary_slave_addr;
963         struct ether_addr active_slave_addr;
964
965         if (num_of_slaves < 1)
966                 return num_tx_total;
967
968         memcpy(slaves, internals->tlb_slaves_order,
969                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
970
971
972         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
973
974         if (nb_pkts > 3) {
975                 for (i = 0; i < 3; i++)
976                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
977         }
978
979         for (i = 0; i < num_of_slaves; i++) {
980                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
981                 for (j = num_tx_total; j < nb_pkts; j++) {
982                         if (j + 3 < nb_pkts)
983                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
984
985                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
986                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
987                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
988 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
989                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
990 #endif
991                 }
992
993                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
994                                 bufs + num_tx_total, nb_pkts - num_tx_total);
995
996                 if (num_tx_total == nb_pkts)
997                         break;
998         }
999
1000         return num_tx_total;
1001 }
1002
1003 void
1004 bond_tlb_disable(struct bond_dev_private *internals)
1005 {
1006         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1007 }
1008
1009 void
1010 bond_tlb_enable(struct bond_dev_private *internals)
1011 {
1012         bond_ethdev_update_tlb_slave_cb(internals);
1013 }
1014
1015 static uint16_t
1016 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1017 {
1018         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1019         struct bond_dev_private *internals = bd_tx_q->dev_private;
1020
1021         struct ether_hdr *eth_h;
1022         uint16_t ether_type, offset;
1023
1024         struct client_data *client_info;
1025
1026         /*
1027          * We create transmit buffers for every slave and one additional to send
1028          * through tlb. In worst case every packet will be send on one port.
1029          */
1030         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1031         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1032
1033         /*
1034          * We create separate transmit buffers for update packets as they won't
1035          * be counted in num_tx_total.
1036          */
1037         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1038         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1039
1040         struct rte_mbuf *upd_pkt;
1041         size_t pkt_size;
1042
1043         uint16_t num_send, num_not_send = 0;
1044         uint16_t num_tx_total = 0;
1045         uint16_t slave_idx;
1046
1047         int i, j;
1048
1049         /* Search tx buffer for ARP packets and forward them to alb */
1050         for (i = 0; i < nb_pkts; i++) {
1051                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1052                 ether_type = eth_h->ether_type;
1053                 offset = get_vlan_offset(eth_h, &ether_type);
1054
1055                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1056                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1057
1058                         /* Change src mac in eth header */
1059                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1060
1061                         /* Add packet to slave tx buffer */
1062                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1063                         slave_bufs_pkts[slave_idx]++;
1064                 } else {
1065                         /* If packet is not ARP, send it with TLB policy */
1066                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1067                                         bufs[i];
1068                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1069                 }
1070         }
1071
1072         /* Update connected client ARP tables */
1073         if (internals->mode6.ntt) {
1074                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1075                         client_info = &internals->mode6.client_table[i];
1076
1077                         if (client_info->in_use) {
1078                                 /* Allocate new packet to send ARP update on current slave */
1079                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1080                                 if (upd_pkt == NULL) {
1081                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1082                                         continue;
1083                                 }
1084                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1085                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1086                                 upd_pkt->data_len = pkt_size;
1087                                 upd_pkt->pkt_len = pkt_size;
1088
1089                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1090                                                 internals);
1091
1092                                 /* Add packet to update tx buffer */
1093                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1094                                 update_bufs_pkts[slave_idx]++;
1095                         }
1096                 }
1097                 internals->mode6.ntt = 0;
1098         }
1099
1100         /* Send ARP packets on proper slaves */
1101         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1102                 if (slave_bufs_pkts[i] > 0) {
1103                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1104                                         slave_bufs[i], slave_bufs_pkts[i]);
1105                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1106                                 bufs[nb_pkts - 1 - num_not_send - j] =
1107                                                 slave_bufs[i][nb_pkts - 1 - j];
1108                         }
1109
1110                         num_tx_total += num_send;
1111                         num_not_send += slave_bufs_pkts[i] - num_send;
1112
1113 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1114         /* Print TX stats including update packets */
1115                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1116                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1117                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1118                         }
1119 #endif
1120                 }
1121         }
1122
1123         /* Send update packets on proper slaves */
1124         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1125                 if (update_bufs_pkts[i] > 0) {
1126                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1127                                         update_bufs_pkts[i]);
1128                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1129                                 rte_pktmbuf_free(update_bufs[i][j]);
1130                         }
1131 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1132                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1133                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1134                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1135                         }
1136 #endif
1137                 }
1138         }
1139
1140         /* Send non-ARP packets using tlb policy */
1141         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1142                 num_send = bond_ethdev_tx_burst_tlb(queue,
1143                                 slave_bufs[RTE_MAX_ETHPORTS],
1144                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1145
1146                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1147                         bufs[nb_pkts - 1 - num_not_send - j] =
1148                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1149                 }
1150
1151                 num_tx_total += num_send;
1152         }
1153
1154         return num_tx_total;
1155 }
1156
1157 static uint16_t
1158 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1159                 uint16_t nb_pkts)
1160 {
1161         struct bond_dev_private *internals;
1162         struct bond_tx_queue *bd_tx_q;
1163
1164         uint16_t num_of_slaves;
1165         uint16_t slaves[RTE_MAX_ETHPORTS];
1166
1167         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1168
1169         int i, op_slave_id;
1170
1171         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1172         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1173
1174         bd_tx_q = (struct bond_tx_queue *)queue;
1175         internals = bd_tx_q->dev_private;
1176
1177         /* Copy slave list to protect against slave up/down changes during tx
1178          * bursting */
1179         num_of_slaves = internals->active_slave_count;
1180         memcpy(slaves, internals->active_slaves,
1181                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1182
1183         if (num_of_slaves < 1)
1184                 return num_tx_total;
1185
1186         /* Populate slaves mbuf with the packets which are to be sent on it  */
1187         for (i = 0; i < nb_pkts; i++) {
1188                 /* Select output slave using hash based on xmit policy */
1189                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1190
1191                 /* Populate slave mbuf arrays with mbufs for that slave */
1192                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1193         }
1194
1195         /* Send packet burst on each slave device */
1196         for (i = 0; i < num_of_slaves; i++) {
1197                 if (slave_nb_pkts[i] > 0) {
1198                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1199                                         slave_bufs[i], slave_nb_pkts[i]);
1200
1201                         /* if tx burst fails move packets to end of bufs */
1202                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1203                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1204
1205                                 tx_fail_total += slave_tx_fail_count;
1206                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1207                                                 &slave_bufs[i][num_tx_slave],
1208                                                 slave_tx_fail_count * sizeof(bufs[0]));
1209                         }
1210
1211                         num_tx_total += num_tx_slave;
1212                 }
1213         }
1214
1215         return num_tx_total;
1216 }
1217
1218 static uint16_t
1219 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1220                 uint16_t nb_pkts)
1221 {
1222         struct bond_dev_private *internals;
1223         struct bond_tx_queue *bd_tx_q;
1224
1225         uint16_t num_of_slaves;
1226         uint16_t slaves[RTE_MAX_ETHPORTS];
1227          /* positions in slaves, not ID */
1228         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1229         uint8_t distributing_count;
1230
1231         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1232         uint16_t i, j, op_slave_idx;
1233         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1234
1235         /* Allocate additional packets in case 8023AD mode. */
1236         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1237         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1238
1239         /* Total amount of packets in slave_bufs */
1240         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1241         /* Slow packets placed in each slave */
1242         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1243
1244         bd_tx_q = (struct bond_tx_queue *)queue;
1245         internals = bd_tx_q->dev_private;
1246
1247         /* Copy slave list to protect against slave up/down changes during tx
1248          * bursting */
1249         num_of_slaves = internals->active_slave_count;
1250         if (num_of_slaves < 1)
1251                 return num_tx_total;
1252
1253         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1254
1255         distributing_count = 0;
1256         for (i = 0; i < num_of_slaves; i++) {
1257                 struct port *port = &mode_8023ad_ports[slaves[i]];
1258
1259                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1260                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1261                                 NULL);
1262                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1263
1264                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1265                         slave_bufs[i][j] = slow_pkts[j];
1266
1267                 if (ACTOR_STATE(port, DISTRIBUTING))
1268                         distributing_offsets[distributing_count++] = i;
1269         }
1270
1271         if (likely(distributing_count > 0)) {
1272                 /* Populate slaves mbuf with the packets which are to be sent on it */
1273                 for (i = 0; i < nb_pkts; i++) {
1274                         /* Select output slave using hash based on xmit policy */
1275                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1276
1277                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1278                          * slaves that are currently distributing. */
1279                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1280                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1281                         slave_nb_pkts[slave_offset]++;
1282                 }
1283         }
1284
1285         /* Send packet burst on each slave device */
1286         for (i = 0; i < num_of_slaves; i++) {
1287                 if (slave_nb_pkts[i] == 0)
1288                         continue;
1289
1290                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1291                                 slave_bufs[i], slave_nb_pkts[i]);
1292
1293                 /* If tx burst fails drop slow packets */
1294                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1295                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1296
1297                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1298                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1299
1300                 /* If tx burst fails move packets to end of bufs */
1301                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1302                         uint16_t j = nb_pkts - num_tx_fail_total;
1303                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1304                                 bufs[j] = slave_bufs[i][num_tx_slave];
1305                 }
1306         }
1307
1308         return num_tx_total;
1309 }
1310
1311 static uint16_t
1312 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1313                 uint16_t nb_pkts)
1314 {
1315         struct bond_dev_private *internals;
1316         struct bond_tx_queue *bd_tx_q;
1317
1318         uint8_t tx_failed_flag = 0, num_of_slaves;
1319         uint16_t slaves[RTE_MAX_ETHPORTS];
1320
1321         uint16_t max_nb_of_tx_pkts = 0;
1322
1323         int slave_tx_total[RTE_MAX_ETHPORTS];
1324         int i, most_successful_tx_slave = -1;
1325
1326         bd_tx_q = (struct bond_tx_queue *)queue;
1327         internals = bd_tx_q->dev_private;
1328
1329         /* Copy slave list to protect against slave up/down changes during tx
1330          * bursting */
1331         num_of_slaves = internals->active_slave_count;
1332         memcpy(slaves, internals->active_slaves,
1333                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1334
1335         if (num_of_slaves < 1)
1336                 return 0;
1337
1338         /* Increment reference count on mbufs */
1339         for (i = 0; i < nb_pkts; i++)
1340                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1341
1342         /* Transmit burst on each active slave */
1343         for (i = 0; i < num_of_slaves; i++) {
1344                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1345                                         bufs, nb_pkts);
1346
1347                 if (unlikely(slave_tx_total[i] < nb_pkts))
1348                         tx_failed_flag = 1;
1349
1350                 /* record the value and slave index for the slave which transmits the
1351                  * maximum number of packets */
1352                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1353                         max_nb_of_tx_pkts = slave_tx_total[i];
1354                         most_successful_tx_slave = i;
1355                 }
1356         }
1357
1358         /* if slaves fail to transmit packets from burst, the calling application
1359          * is not expected to know about multiple references to packets so we must
1360          * handle failures of all packets except those of the most successful slave
1361          */
1362         if (unlikely(tx_failed_flag))
1363                 for (i = 0; i < num_of_slaves; i++)
1364                         if (i != most_successful_tx_slave)
1365                                 while (slave_tx_total[i] < nb_pkts)
1366                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1367
1368         return max_nb_of_tx_pkts;
1369 }
1370
1371 void
1372 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1373 {
1374         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1375
1376         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1377                 /**
1378                  * If in mode 4 then save the link properties of the first
1379                  * slave, all subsequent slaves must match these properties
1380                  */
1381                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1382
1383                 bond_link->link_autoneg = slave_link->link_autoneg;
1384                 bond_link->link_duplex = slave_link->link_duplex;
1385                 bond_link->link_speed = slave_link->link_speed;
1386         } else {
1387                 /**
1388                  * In any other mode the link properties are set to default
1389                  * values of AUTONEG/DUPLEX
1390                  */
1391                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1392                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1393         }
1394 }
1395
1396 int
1397 link_properties_valid(struct rte_eth_dev *ethdev,
1398                 struct rte_eth_link *slave_link)
1399 {
1400         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1401
1402         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1403                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1404
1405                 if (bond_link->link_duplex != slave_link->link_duplex ||
1406                         bond_link->link_autoneg != slave_link->link_autoneg ||
1407                         bond_link->link_speed != slave_link->link_speed)
1408                         return -1;
1409         }
1410
1411         return 0;
1412 }
1413
1414 int
1415 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1416 {
1417         struct ether_addr *mac_addr;
1418
1419         if (eth_dev == NULL) {
1420                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1421                 return -1;
1422         }
1423
1424         if (dst_mac_addr == NULL) {
1425                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1426                 return -1;
1427         }
1428
1429         mac_addr = eth_dev->data->mac_addrs;
1430
1431         ether_addr_copy(mac_addr, dst_mac_addr);
1432         return 0;
1433 }
1434
1435 int
1436 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1437 {
1438         struct ether_addr *mac_addr;
1439
1440         if (eth_dev == NULL) {
1441                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1442                 return -1;
1443         }
1444
1445         if (new_mac_addr == NULL) {
1446                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1447                 return -1;
1448         }
1449
1450         mac_addr = eth_dev->data->mac_addrs;
1451
1452         /* If new MAC is different to current MAC then update */
1453         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1454                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1455
1456         return 0;
1457 }
1458
1459 int
1460 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1461 {
1462         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1463         int i;
1464
1465         /* Update slave devices MAC addresses */
1466         if (internals->slave_count < 1)
1467                 return -1;
1468
1469         switch (internals->mode) {
1470         case BONDING_MODE_ROUND_ROBIN:
1471         case BONDING_MODE_BALANCE:
1472         case BONDING_MODE_BROADCAST:
1473                 for (i = 0; i < internals->slave_count; i++) {
1474                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1475                                         bonded_eth_dev->data->mac_addrs)) {
1476                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1477                                                 internals->slaves[i].port_id);
1478                                 return -1;
1479                         }
1480                 }
1481                 break;
1482         case BONDING_MODE_8023AD:
1483                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1484                 break;
1485         case BONDING_MODE_ACTIVE_BACKUP:
1486         case BONDING_MODE_TLB:
1487         case BONDING_MODE_ALB:
1488         default:
1489                 for (i = 0; i < internals->slave_count; i++) {
1490                         if (internals->slaves[i].port_id ==
1491                                         internals->current_primary_port) {
1492                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1493                                                 bonded_eth_dev->data->mac_addrs)) {
1494                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1495                                                         internals->current_primary_port);
1496                                         return -1;
1497                                 }
1498                         } else {
1499                                 if (mac_address_set(
1500                                                 &rte_eth_devices[internals->slaves[i].port_id],
1501                                                 &internals->slaves[i].persisted_mac_addr)) {
1502                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1503                                                         internals->slaves[i].port_id);
1504                                         return -1;
1505                                 }
1506                         }
1507                 }
1508         }
1509
1510         return 0;
1511 }
1512
1513 int
1514 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1515 {
1516         struct bond_dev_private *internals;
1517
1518         internals = eth_dev->data->dev_private;
1519
1520         switch (mode) {
1521         case BONDING_MODE_ROUND_ROBIN:
1522                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1523                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1524                 break;
1525         case BONDING_MODE_ACTIVE_BACKUP:
1526                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1527                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1528                 break;
1529         case BONDING_MODE_BALANCE:
1530                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1531                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1532                 break;
1533         case BONDING_MODE_BROADCAST:
1534                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1535                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1536                 break;
1537         case BONDING_MODE_8023AD:
1538                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1539                         return -1;
1540
1541                 if (internals->mode4.dedicated_queues.enabled == 0) {
1542                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1543                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1544                         RTE_LOG(WARNING, PMD,
1545                                 "Using mode 4, it is necessary to do TX burst "
1546                                 "and RX burst at least every 100ms.\n");
1547                 } else {
1548                         /* Use flow director's optimization */
1549                         eth_dev->rx_pkt_burst =
1550                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1551                         eth_dev->tx_pkt_burst =
1552                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1553                 }
1554                 break;
1555         case BONDING_MODE_TLB:
1556                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1557                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1558                 break;
1559         case BONDING_MODE_ALB:
1560                 if (bond_mode_alb_enable(eth_dev) != 0)
1561                         return -1;
1562
1563                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1564                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1565                 break;
1566         default:
1567                 return -1;
1568         }
1569
1570         internals->mode = mode;
1571
1572         return 0;
1573 }
1574
1575
1576 static int
1577 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1578                 struct rte_eth_dev *slave_eth_dev)
1579 {
1580         int errval = 0;
1581         struct bond_dev_private *internals = (struct bond_dev_private *)
1582                 bonded_eth_dev->data->dev_private;
1583         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1584
1585         if (port->slow_pool == NULL) {
1586                 char mem_name[256];
1587                 int slave_id = slave_eth_dev->data->port_id;
1588
1589                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1590                                 slave_id);
1591                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1592                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1593                         slave_eth_dev->data->numa_node);
1594
1595                 /* Any memory allocation failure in initialization is critical because
1596                  * resources can't be free, so reinitialization is impossible. */
1597                 if (port->slow_pool == NULL) {
1598                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1599                                 slave_id, mem_name, rte_strerror(rte_errno));
1600                 }
1601         }
1602
1603         if (internals->mode4.dedicated_queues.enabled == 1) {
1604                 /* Configure slow Rx queue */
1605
1606                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1607                                 internals->mode4.dedicated_queues.rx_qid, 128,
1608                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1609                                 NULL, port->slow_pool);
1610                 if (errval != 0) {
1611                         RTE_BOND_LOG(ERR,
1612                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1613                                         slave_eth_dev->data->port_id,
1614                                         internals->mode4.dedicated_queues.rx_qid,
1615                                         errval);
1616                         return errval;
1617                 }
1618
1619                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1620                                 internals->mode4.dedicated_queues.tx_qid, 512,
1621                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1622                                 NULL);
1623                 if (errval != 0) {
1624                         RTE_BOND_LOG(ERR,
1625                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1626                                 slave_eth_dev->data->port_id,
1627                                 internals->mode4.dedicated_queues.tx_qid,
1628                                 errval);
1629                         return errval;
1630                 }
1631         }
1632         return 0;
1633 }
1634
1635 int
1636 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1637                 struct rte_eth_dev *slave_eth_dev)
1638 {
1639         struct bond_rx_queue *bd_rx_q;
1640         struct bond_tx_queue *bd_tx_q;
1641         uint16_t nb_rx_queues;
1642         uint16_t nb_tx_queues;
1643
1644         int errval;
1645         uint16_t q_id;
1646         struct rte_flow_error flow_error;
1647
1648         struct bond_dev_private *internals = (struct bond_dev_private *)
1649                 bonded_eth_dev->data->dev_private;
1650
1651         /* Stop slave */
1652         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1653
1654         /* Enable interrupts on slave device if supported */
1655         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1656                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1657
1658         /* If RSS is enabled for bonding, try to enable it for slaves  */
1659         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1660                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1661                                 != 0) {
1662                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1663                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1664                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1665                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1666                 } else {
1667                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1668                 }
1669
1670                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1671                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1672                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1673                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1674         }
1675
1676         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1677                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1678
1679         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1680         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1681
1682         if (internals->mode == BONDING_MODE_8023AD) {
1683                 if (internals->mode4.dedicated_queues.enabled == 1) {
1684                         nb_rx_queues++;
1685                         nb_tx_queues++;
1686                 }
1687         }
1688
1689         /* Configure device */
1690         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1691                         nb_rx_queues, nb_tx_queues,
1692                         &(slave_eth_dev->data->dev_conf));
1693         if (errval != 0) {
1694                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1695                                 slave_eth_dev->data->port_id, errval);
1696                 return errval;
1697         }
1698
1699         /* Setup Rx Queues */
1700         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1701                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1702
1703                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1704                                 bd_rx_q->nb_rx_desc,
1705                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1706                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1707                 if (errval != 0) {
1708                         RTE_BOND_LOG(ERR,
1709                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1710                                         slave_eth_dev->data->port_id, q_id, errval);
1711                         return errval;
1712                 }
1713         }
1714
1715         /* Setup Tx Queues */
1716         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1717                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1718
1719                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1720                                 bd_tx_q->nb_tx_desc,
1721                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1722                                 &bd_tx_q->tx_conf);
1723                 if (errval != 0) {
1724                         RTE_BOND_LOG(ERR,
1725                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1726                                 slave_eth_dev->data->port_id, q_id, errval);
1727                         return errval;
1728                 }
1729         }
1730
1731         if (internals->mode == BONDING_MODE_8023AD &&
1732                         internals->mode4.dedicated_queues.enabled == 1) {
1733                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1734                                 != 0)
1735                         return errval;
1736
1737                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1738                                 slave_eth_dev->data->port_id) != 0) {
1739                         RTE_BOND_LOG(ERR,
1740                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1741                                 slave_eth_dev->data->port_id, q_id, errval);
1742                         return -1;
1743                 }
1744
1745                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1746                         rte_flow_destroy(slave_eth_dev->data->port_id,
1747                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1748                                         &flow_error);
1749
1750                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1751                                 slave_eth_dev->data->port_id);
1752         }
1753
1754         /* Start device */
1755         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1756         if (errval != 0) {
1757                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1758                                 slave_eth_dev->data->port_id, errval);
1759                 return -1;
1760         }
1761
1762         /* If RSS is enabled for bonding, synchronize RETA */
1763         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1764                 int i;
1765                 struct bond_dev_private *internals;
1766
1767                 internals = bonded_eth_dev->data->dev_private;
1768
1769                 for (i = 0; i < internals->slave_count; i++) {
1770                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1771                                 errval = rte_eth_dev_rss_reta_update(
1772                                                 slave_eth_dev->data->port_id,
1773                                                 &internals->reta_conf[0],
1774                                                 internals->slaves[i].reta_size);
1775                                 if (errval != 0) {
1776                                         RTE_LOG(WARNING, PMD,
1777                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1778                                                         " RSS Configuration for bonding may be inconsistent.\n",
1779                                                         slave_eth_dev->data->port_id, errval);
1780                                 }
1781                                 break;
1782                         }
1783                 }
1784         }
1785
1786         /* If lsc interrupt is set, check initial slave's link status */
1787         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1788                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1789                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1790                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1791                         NULL);
1792         }
1793
1794         return 0;
1795 }
1796
1797 void
1798 slave_remove(struct bond_dev_private *internals,
1799                 struct rte_eth_dev *slave_eth_dev)
1800 {
1801         uint8_t i;
1802
1803         for (i = 0; i < internals->slave_count; i++)
1804                 if (internals->slaves[i].port_id ==
1805                                 slave_eth_dev->data->port_id)
1806                         break;
1807
1808         if (i < (internals->slave_count - 1))
1809                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1810                                 sizeof(internals->slaves[0]) *
1811                                 (internals->slave_count - i - 1));
1812
1813         internals->slave_count--;
1814
1815         /* force reconfiguration of slave interfaces */
1816         _rte_eth_dev_reset(slave_eth_dev);
1817 }
1818
1819 static void
1820 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1821
1822 void
1823 slave_add(struct bond_dev_private *internals,
1824                 struct rte_eth_dev *slave_eth_dev)
1825 {
1826         struct bond_slave_details *slave_details =
1827                         &internals->slaves[internals->slave_count];
1828
1829         slave_details->port_id = slave_eth_dev->data->port_id;
1830         slave_details->last_link_status = 0;
1831
1832         /* Mark slave devices that don't support interrupts so we can
1833          * compensate when we start the bond
1834          */
1835         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1836                 slave_details->link_status_poll_enabled = 1;
1837         }
1838
1839         slave_details->link_status_wait_to_complete = 0;
1840         /* clean tlb_last_obytes when adding port for bonding device */
1841         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1842                         sizeof(struct ether_addr));
1843 }
1844
1845 void
1846 bond_ethdev_primary_set(struct bond_dev_private *internals,
1847                 uint16_t slave_port_id)
1848 {
1849         int i;
1850
1851         if (internals->active_slave_count < 1)
1852                 internals->current_primary_port = slave_port_id;
1853         else
1854                 /* Search bonded device slave ports for new proposed primary port */
1855                 for (i = 0; i < internals->active_slave_count; i++) {
1856                         if (internals->active_slaves[i] == slave_port_id)
1857                                 internals->current_primary_port = slave_port_id;
1858                 }
1859 }
1860
1861 static void
1862 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1863
1864 static int
1865 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1866 {
1867         struct bond_dev_private *internals;
1868         int i;
1869
1870         /* slave eth dev will be started by bonded device */
1871         if (check_for_bonded_ethdev(eth_dev)) {
1872                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1873                                 eth_dev->data->port_id);
1874                 return -1;
1875         }
1876
1877         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1878         eth_dev->data->dev_started = 1;
1879
1880         internals = eth_dev->data->dev_private;
1881
1882         if (internals->slave_count == 0) {
1883                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1884                 return -1;
1885         }
1886
1887         if (internals->user_defined_mac == 0) {
1888                 struct ether_addr *new_mac_addr = NULL;
1889
1890                 for (i = 0; i < internals->slave_count; i++)
1891                         if (internals->slaves[i].port_id == internals->primary_port)
1892                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1893
1894                 if (new_mac_addr == NULL)
1895                         return -1;
1896
1897                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1898                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1899                                         eth_dev->data->port_id);
1900                         return -1;
1901                 }
1902         }
1903
1904         /* Update all slave devices MACs*/
1905         if (mac_address_slaves_update(eth_dev) != 0)
1906                 return -1;
1907
1908         /* If bonded device is configure in promiscuous mode then re-apply config */
1909         if (internals->promiscuous_en)
1910                 bond_ethdev_promiscuous_enable(eth_dev);
1911
1912         if (internals->mode == BONDING_MODE_8023AD) {
1913                 if (internals->mode4.dedicated_queues.enabled == 1) {
1914                         internals->mode4.dedicated_queues.rx_qid =
1915                                         eth_dev->data->nb_rx_queues;
1916                         internals->mode4.dedicated_queues.tx_qid =
1917                                         eth_dev->data->nb_tx_queues;
1918                 }
1919         }
1920
1921
1922         /* Reconfigure each slave device if starting bonded device */
1923         for (i = 0; i < internals->slave_count; i++) {
1924                 struct rte_eth_dev *slave_ethdev =
1925                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1926                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1927                         RTE_BOND_LOG(ERR,
1928                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1929                                 eth_dev->data->port_id,
1930                                 internals->slaves[i].port_id);
1931                         return -1;
1932                 }
1933                 /* We will need to poll for link status if any slave doesn't
1934                  * support interrupts
1935                  */
1936                 if (internals->slaves[i].link_status_poll_enabled)
1937                         internals->link_status_polling_enabled = 1;
1938         }
1939         /* start polling if needed */
1940         if (internals->link_status_polling_enabled) {
1941                 rte_eal_alarm_set(
1942                         internals->link_status_polling_interval_ms * 1000,
1943                         bond_ethdev_slave_link_status_change_monitor,
1944                         (void *)&rte_eth_devices[internals->port_id]);
1945         }
1946
1947         if (internals->user_defined_primary_port)
1948                 bond_ethdev_primary_set(internals, internals->primary_port);
1949
1950         if (internals->mode == BONDING_MODE_8023AD)
1951                 bond_mode_8023ad_start(eth_dev);
1952
1953         if (internals->mode == BONDING_MODE_TLB ||
1954                         internals->mode == BONDING_MODE_ALB)
1955                 bond_tlb_enable(internals);
1956
1957         return 0;
1958 }
1959
1960 static void
1961 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1962 {
1963         uint8_t i;
1964
1965         if (dev->data->rx_queues != NULL) {
1966                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1967                         rte_free(dev->data->rx_queues[i]);
1968                         dev->data->rx_queues[i] = NULL;
1969                 }
1970                 dev->data->nb_rx_queues = 0;
1971         }
1972
1973         if (dev->data->tx_queues != NULL) {
1974                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1975                         rte_free(dev->data->tx_queues[i]);
1976                         dev->data->tx_queues[i] = NULL;
1977                 }
1978                 dev->data->nb_tx_queues = 0;
1979         }
1980 }
1981
1982 void
1983 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1984 {
1985         struct bond_dev_private *internals = eth_dev->data->dev_private;
1986         uint8_t i;
1987
1988         if (internals->mode == BONDING_MODE_8023AD) {
1989                 struct port *port;
1990                 void *pkt = NULL;
1991
1992                 bond_mode_8023ad_stop(eth_dev);
1993
1994                 /* Discard all messages to/from mode 4 state machines */
1995                 for (i = 0; i < internals->active_slave_count; i++) {
1996                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1997
1998                         RTE_ASSERT(port->rx_ring != NULL);
1999                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2000                                 rte_pktmbuf_free(pkt);
2001
2002                         RTE_ASSERT(port->tx_ring != NULL);
2003                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2004                                 rte_pktmbuf_free(pkt);
2005                 }
2006         }
2007
2008         if (internals->mode == BONDING_MODE_TLB ||
2009                         internals->mode == BONDING_MODE_ALB) {
2010                 bond_tlb_disable(internals);
2011                 for (i = 0; i < internals->active_slave_count; i++)
2012                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2013         }
2014
2015         internals->active_slave_count = 0;
2016         internals->link_status_polling_enabled = 0;
2017         for (i = 0; i < internals->slave_count; i++)
2018                 internals->slaves[i].last_link_status = 0;
2019
2020         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2021         eth_dev->data->dev_started = 0;
2022 }
2023
2024 void
2025 bond_ethdev_close(struct rte_eth_dev *dev)
2026 {
2027         struct bond_dev_private *internals = dev->data->dev_private;
2028         uint8_t bond_port_id = internals->port_id;
2029         int skipped = 0;
2030
2031         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2032         while (internals->slave_count != skipped) {
2033                 uint16_t port_id = internals->slaves[skipped].port_id;
2034
2035                 rte_eth_dev_stop(port_id);
2036
2037                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2038                         RTE_LOG(ERR, EAL,
2039                                 "Failed to remove port %d from bonded device "
2040                                 "%s\n", port_id, dev->device->name);
2041                         skipped++;
2042                 }
2043         }
2044         bond_ethdev_free_queues(dev);
2045         rte_bitmap_reset(internals->vlan_filter_bmp);
2046 }
2047
2048 /* forward declaration */
2049 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2050
2051 static void
2052 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2053 {
2054         struct bond_dev_private *internals = dev->data->dev_private;
2055
2056         uint16_t max_nb_rx_queues = UINT16_MAX;
2057         uint16_t max_nb_tx_queues = UINT16_MAX;
2058
2059         dev_info->max_mac_addrs = 1;
2060
2061         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2062                         internals->candidate_max_rx_pktlen :
2063                         ETHER_MAX_JUMBO_FRAME_LEN;
2064
2065         /* Max number of tx/rx queues that the bonded device can support is the
2066          * minimum values of the bonded slaves, as all slaves must be capable
2067          * of supporting the same number of tx/rx queues.
2068          */
2069         if (internals->slave_count > 0) {
2070                 struct rte_eth_dev_info slave_info;
2071                 uint8_t idx;
2072
2073                 for (idx = 0; idx < internals->slave_count; idx++) {
2074                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2075                                         &slave_info);
2076
2077                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2078                                 max_nb_rx_queues = slave_info.max_rx_queues;
2079
2080                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2081                                 max_nb_tx_queues = slave_info.max_tx_queues;
2082                 }
2083         }
2084
2085         dev_info->max_rx_queues = max_nb_rx_queues;
2086         dev_info->max_tx_queues = max_nb_tx_queues;
2087
2088         /**
2089          * If dedicated hw queues enabled for link bonding device in LACP mode
2090          * then we need to reduce the maximum number of data path queues by 1.
2091          */
2092         if (internals->mode == BONDING_MODE_8023AD &&
2093                 internals->mode4.dedicated_queues.enabled == 1) {
2094                 dev_info->max_rx_queues--;
2095                 dev_info->max_tx_queues--;
2096         }
2097
2098         dev_info->min_rx_bufsize = 0;
2099
2100         dev_info->rx_offload_capa = internals->rx_offload_capa;
2101         dev_info->tx_offload_capa = internals->tx_offload_capa;
2102         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2103
2104         dev_info->reta_size = internals->reta_size;
2105 }
2106
2107 static int
2108 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2109 {
2110         int res;
2111         uint16_t i;
2112         struct bond_dev_private *internals = dev->data->dev_private;
2113
2114         /* don't do this while a slave is being added */
2115         rte_spinlock_lock(&internals->lock);
2116
2117         if (on)
2118                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2119         else
2120                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2121
2122         for (i = 0; i < internals->slave_count; i++) {
2123                 uint16_t port_id = internals->slaves[i].port_id;
2124
2125                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2126                 if (res == ENOTSUP)
2127                         RTE_LOG(WARNING, PMD,
2128                                 "Setting VLAN filter on slave port %u not supported.\n",
2129                                 port_id);
2130         }
2131
2132         rte_spinlock_unlock(&internals->lock);
2133         return 0;
2134 }
2135
2136 static int
2137 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2138                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2139                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2140 {
2141         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2142                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2143                                         0, dev->data->numa_node);
2144         if (bd_rx_q == NULL)
2145                 return -1;
2146
2147         bd_rx_q->queue_id = rx_queue_id;
2148         bd_rx_q->dev_private = dev->data->dev_private;
2149
2150         bd_rx_q->nb_rx_desc = nb_rx_desc;
2151
2152         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2153         bd_rx_q->mb_pool = mb_pool;
2154
2155         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2156
2157         return 0;
2158 }
2159
2160 static int
2161 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2162                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2163                 const struct rte_eth_txconf *tx_conf)
2164 {
2165         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2166                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2167                                         0, dev->data->numa_node);
2168
2169         if (bd_tx_q == NULL)
2170                 return -1;
2171
2172         bd_tx_q->queue_id = tx_queue_id;
2173         bd_tx_q->dev_private = dev->data->dev_private;
2174
2175         bd_tx_q->nb_tx_desc = nb_tx_desc;
2176         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2177
2178         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2179
2180         return 0;
2181 }
2182
2183 static void
2184 bond_ethdev_rx_queue_release(void *queue)
2185 {
2186         if (queue == NULL)
2187                 return;
2188
2189         rte_free(queue);
2190 }
2191
2192 static void
2193 bond_ethdev_tx_queue_release(void *queue)
2194 {
2195         if (queue == NULL)
2196                 return;
2197
2198         rte_free(queue);
2199 }
2200
2201 static void
2202 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2203 {
2204         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2205         struct bond_dev_private *internals;
2206
2207         /* Default value for polling slave found is true as we don't want to
2208          * disable the polling thread if we cannot get the lock */
2209         int i, polling_slave_found = 1;
2210
2211         if (cb_arg == NULL)
2212                 return;
2213
2214         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2215         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2216
2217         if (!bonded_ethdev->data->dev_started ||
2218                 !internals->link_status_polling_enabled)
2219                 return;
2220
2221         /* If device is currently being configured then don't check slaves link
2222          * status, wait until next period */
2223         if (rte_spinlock_trylock(&internals->lock)) {
2224                 if (internals->slave_count > 0)
2225                         polling_slave_found = 0;
2226
2227                 for (i = 0; i < internals->slave_count; i++) {
2228                         if (!internals->slaves[i].link_status_poll_enabled)
2229                                 continue;
2230
2231                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2232                         polling_slave_found = 1;
2233
2234                         /* Update slave link status */
2235                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2236                                         internals->slaves[i].link_status_wait_to_complete);
2237
2238                         /* if link status has changed since last checked then call lsc
2239                          * event callback */
2240                         if (slave_ethdev->data->dev_link.link_status !=
2241                                         internals->slaves[i].last_link_status) {
2242                                 internals->slaves[i].last_link_status =
2243                                                 slave_ethdev->data->dev_link.link_status;
2244
2245                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2246                                                 RTE_ETH_EVENT_INTR_LSC,
2247                                                 &bonded_ethdev->data->port_id,
2248                                                 NULL);
2249                         }
2250                 }
2251                 rte_spinlock_unlock(&internals->lock);
2252         }
2253
2254         if (polling_slave_found)
2255                 /* Set alarm to continue monitoring link status of slave ethdev's */
2256                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2257                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2258 }
2259
2260 static int
2261 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2262 {
2263         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2264
2265         struct bond_dev_private *bond_ctx;
2266         struct rte_eth_link slave_link;
2267
2268         uint32_t idx;
2269
2270         bond_ctx = ethdev->data->dev_private;
2271
2272         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2273
2274         if (ethdev->data->dev_started == 0 ||
2275                         bond_ctx->active_slave_count == 0) {
2276                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2277                 return 0;
2278         }
2279
2280         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2281
2282         if (wait_to_complete)
2283                 link_update = rte_eth_link_get;
2284         else
2285                 link_update = rte_eth_link_get_nowait;
2286
2287         switch (bond_ctx->mode) {
2288         case BONDING_MODE_BROADCAST:
2289                 /**
2290                  * Setting link speed to UINT32_MAX to ensure we pick up the
2291                  * value of the first active slave
2292                  */
2293                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2294
2295                 /**
2296                  * link speed is minimum value of all the slaves link speed as
2297                  * packet loss will occur on this slave if transmission at rates
2298                  * greater than this are attempted
2299                  */
2300                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2301                         link_update(bond_ctx->active_slaves[0], &slave_link);
2302
2303                         if (slave_link.link_speed <
2304                                         ethdev->data->dev_link.link_speed)
2305                                 ethdev->data->dev_link.link_speed =
2306                                                 slave_link.link_speed;
2307                 }
2308                 break;
2309         case BONDING_MODE_ACTIVE_BACKUP:
2310                 /* Current primary slave */
2311                 link_update(bond_ctx->current_primary_port, &slave_link);
2312
2313                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2314                 break;
2315         case BONDING_MODE_8023AD:
2316                 ethdev->data->dev_link.link_autoneg =
2317                                 bond_ctx->mode4.slave_link.link_autoneg;
2318                 ethdev->data->dev_link.link_duplex =
2319                                 bond_ctx->mode4.slave_link.link_duplex;
2320                 /* fall through to update link speed */
2321         case BONDING_MODE_ROUND_ROBIN:
2322         case BONDING_MODE_BALANCE:
2323         case BONDING_MODE_TLB:
2324         case BONDING_MODE_ALB:
2325         default:
2326                 /**
2327                  * In theses mode the maximum theoretical link speed is the sum
2328                  * of all the slaves
2329                  */
2330                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2331
2332                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2333                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2334
2335                         ethdev->data->dev_link.link_speed +=
2336                                         slave_link.link_speed;
2337                 }
2338         }
2339
2340
2341         return 0;
2342 }
2343
2344
2345 static int
2346 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2347 {
2348         struct bond_dev_private *internals = dev->data->dev_private;
2349         struct rte_eth_stats slave_stats;
2350         int i, j;
2351
2352         for (i = 0; i < internals->slave_count; i++) {
2353                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2354
2355                 stats->ipackets += slave_stats.ipackets;
2356                 stats->opackets += slave_stats.opackets;
2357                 stats->ibytes += slave_stats.ibytes;
2358                 stats->obytes += slave_stats.obytes;
2359                 stats->imissed += slave_stats.imissed;
2360                 stats->ierrors += slave_stats.ierrors;
2361                 stats->oerrors += slave_stats.oerrors;
2362                 stats->rx_nombuf += slave_stats.rx_nombuf;
2363
2364                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2365                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2366                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2367                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2368                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2369                         stats->q_errors[j] += slave_stats.q_errors[j];
2370                 }
2371
2372         }
2373
2374         return 0;
2375 }
2376
2377 static void
2378 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2379 {
2380         struct bond_dev_private *internals = dev->data->dev_private;
2381         int i;
2382
2383         for (i = 0; i < internals->slave_count; i++)
2384                 rte_eth_stats_reset(internals->slaves[i].port_id);
2385 }
2386
2387 static void
2388 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2389 {
2390         struct bond_dev_private *internals = eth_dev->data->dev_private;
2391         int i;
2392
2393         internals->promiscuous_en = 1;
2394
2395         switch (internals->mode) {
2396         /* Promiscuous mode is propagated to all slaves */
2397         case BONDING_MODE_ROUND_ROBIN:
2398         case BONDING_MODE_BALANCE:
2399         case BONDING_MODE_BROADCAST:
2400                 for (i = 0; i < internals->slave_count; i++)
2401                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2402                 break;
2403         /* In mode4 promiscus mode is managed when slave is added/removed */
2404         case BONDING_MODE_8023AD:
2405                 break;
2406         /* Promiscuous mode is propagated only to primary slave */
2407         case BONDING_MODE_ACTIVE_BACKUP:
2408         case BONDING_MODE_TLB:
2409         case BONDING_MODE_ALB:
2410         default:
2411                 rte_eth_promiscuous_enable(internals->current_primary_port);
2412         }
2413 }
2414
2415 static void
2416 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2417 {
2418         struct bond_dev_private *internals = dev->data->dev_private;
2419         int i;
2420
2421         internals->promiscuous_en = 0;
2422
2423         switch (internals->mode) {
2424         /* Promiscuous mode is propagated to all slaves */
2425         case BONDING_MODE_ROUND_ROBIN:
2426         case BONDING_MODE_BALANCE:
2427         case BONDING_MODE_BROADCAST:
2428                 for (i = 0; i < internals->slave_count; i++)
2429                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2430                 break;
2431         /* In mode4 promiscus mode is set managed when slave is added/removed */
2432         case BONDING_MODE_8023AD:
2433                 break;
2434         /* Promiscuous mode is propagated only to primary slave */
2435         case BONDING_MODE_ACTIVE_BACKUP:
2436         case BONDING_MODE_TLB:
2437         case BONDING_MODE_ALB:
2438         default:
2439                 rte_eth_promiscuous_disable(internals->current_primary_port);
2440         }
2441 }
2442
2443 static void
2444 bond_ethdev_delayed_lsc_propagation(void *arg)
2445 {
2446         if (arg == NULL)
2447                 return;
2448
2449         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2450                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2451 }
2452
2453 int
2454 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2455                 void *param, void *ret_param __rte_unused)
2456 {
2457         struct rte_eth_dev *bonded_eth_dev;
2458         struct bond_dev_private *internals;
2459         struct rte_eth_link link;
2460         int rc = -1;
2461
2462         int i, valid_slave = 0;
2463         uint8_t active_pos;
2464         uint8_t lsc_flag = 0;
2465
2466         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2467                 return rc;
2468
2469         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2470
2471         if (check_for_bonded_ethdev(bonded_eth_dev))
2472                 return rc;
2473
2474         internals = bonded_eth_dev->data->dev_private;
2475
2476         /* If the device isn't started don't handle interrupts */
2477         if (!bonded_eth_dev->data->dev_started)
2478                 return rc;
2479
2480         /* verify that port_id is a valid slave of bonded port */
2481         for (i = 0; i < internals->slave_count; i++) {
2482                 if (internals->slaves[i].port_id == port_id) {
2483                         valid_slave = 1;
2484                         break;
2485                 }
2486         }
2487
2488         if (!valid_slave)
2489                 return rc;
2490
2491         /* Search for port in active port list */
2492         active_pos = find_slave_by_id(internals->active_slaves,
2493                         internals->active_slave_count, port_id);
2494
2495         rte_eth_link_get_nowait(port_id, &link);
2496         if (link.link_status) {
2497                 if (active_pos < internals->active_slave_count)
2498                         return rc;
2499
2500                 /* if no active slave ports then set this port to be primary port */
2501                 if (internals->active_slave_count < 1) {
2502                         /* If first active slave, then change link status */
2503                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2504                         internals->current_primary_port = port_id;
2505                         lsc_flag = 1;
2506
2507                         mac_address_slaves_update(bonded_eth_dev);
2508                 }
2509
2510                 activate_slave(bonded_eth_dev, port_id);
2511
2512                 /* If user has defined the primary port then default to using it */
2513                 if (internals->user_defined_primary_port &&
2514                                 internals->primary_port == port_id)
2515                         bond_ethdev_primary_set(internals, port_id);
2516         } else {
2517                 if (active_pos == internals->active_slave_count)
2518                         return rc;
2519
2520                 /* Remove from active slave list */
2521                 deactivate_slave(bonded_eth_dev, port_id);
2522
2523                 if (internals->active_slave_count < 1)
2524                         lsc_flag = 1;
2525
2526                 /* Update primary id, take first active slave from list or if none
2527                  * available set to -1 */
2528                 if (port_id == internals->current_primary_port) {
2529                         if (internals->active_slave_count > 0)
2530                                 bond_ethdev_primary_set(internals,
2531                                                 internals->active_slaves[0]);
2532                         else
2533                                 internals->current_primary_port = internals->primary_port;
2534                 }
2535         }
2536
2537         /**
2538          * Update bonded device link properties after any change to active
2539          * slaves
2540          */
2541         bond_ethdev_link_update(bonded_eth_dev, 0);
2542
2543         if (lsc_flag) {
2544                 /* Cancel any possible outstanding interrupts if delays are enabled */
2545                 if (internals->link_up_delay_ms > 0 ||
2546                         internals->link_down_delay_ms > 0)
2547                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2548                                         bonded_eth_dev);
2549
2550                 if (bonded_eth_dev->data->dev_link.link_status) {
2551                         if (internals->link_up_delay_ms > 0)
2552                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2553                                                 bond_ethdev_delayed_lsc_propagation,
2554                                                 (void *)bonded_eth_dev);
2555                         else
2556                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2557                                                 RTE_ETH_EVENT_INTR_LSC,
2558                                                 NULL, NULL);
2559
2560                 } else {
2561                         if (internals->link_down_delay_ms > 0)
2562                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2563                                                 bond_ethdev_delayed_lsc_propagation,
2564                                                 (void *)bonded_eth_dev);
2565                         else
2566                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2567                                                 RTE_ETH_EVENT_INTR_LSC,
2568                                                 NULL, NULL);
2569                 }
2570         }
2571         return 0;
2572 }
2573
2574 static int
2575 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2576                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2577 {
2578         unsigned i, j;
2579         int result = 0;
2580         int slave_reta_size;
2581         unsigned reta_count;
2582         struct bond_dev_private *internals = dev->data->dev_private;
2583
2584         if (reta_size != internals->reta_size)
2585                 return -EINVAL;
2586
2587          /* Copy RETA table */
2588         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2589
2590         for (i = 0; i < reta_count; i++) {
2591                 internals->reta_conf[i].mask = reta_conf[i].mask;
2592                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2593                         if ((reta_conf[i].mask >> j) & 0x01)
2594                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2595         }
2596
2597         /* Fill rest of array */
2598         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2599                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2600                                 sizeof(internals->reta_conf[0]) * reta_count);
2601
2602         /* Propagate RETA over slaves */
2603         for (i = 0; i < internals->slave_count; i++) {
2604                 slave_reta_size = internals->slaves[i].reta_size;
2605                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2606                                 &internals->reta_conf[0], slave_reta_size);
2607                 if (result < 0)
2608                         return result;
2609         }
2610
2611         return 0;
2612 }
2613
2614 static int
2615 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2616                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2617 {
2618         int i, j;
2619         struct bond_dev_private *internals = dev->data->dev_private;
2620
2621         if (reta_size != internals->reta_size)
2622                 return -EINVAL;
2623
2624          /* Copy RETA table */
2625         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2626                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2627                         if ((reta_conf[i].mask >> j) & 0x01)
2628                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2629
2630         return 0;
2631 }
2632
2633 static int
2634 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2635                 struct rte_eth_rss_conf *rss_conf)
2636 {
2637         int i, result = 0;
2638         struct bond_dev_private *internals = dev->data->dev_private;
2639         struct rte_eth_rss_conf bond_rss_conf;
2640
2641         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2642
2643         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2644
2645         if (bond_rss_conf.rss_hf != 0)
2646                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2647
2648         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2649                         sizeof(internals->rss_key)) {
2650                 if (bond_rss_conf.rss_key_len == 0)
2651                         bond_rss_conf.rss_key_len = 40;
2652                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2653                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2654                                 internals->rss_key_len);
2655         }
2656
2657         for (i = 0; i < internals->slave_count; i++) {
2658                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2659                                 &bond_rss_conf);
2660                 if (result < 0)
2661                         return result;
2662         }
2663
2664         return 0;
2665 }
2666
2667 static int
2668 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2669                 struct rte_eth_rss_conf *rss_conf)
2670 {
2671         struct bond_dev_private *internals = dev->data->dev_private;
2672
2673         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2674         rss_conf->rss_key_len = internals->rss_key_len;
2675         if (rss_conf->rss_key)
2676                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2677
2678         return 0;
2679 }
2680
2681 const struct eth_dev_ops default_dev_ops = {
2682         .dev_start            = bond_ethdev_start,
2683         .dev_stop             = bond_ethdev_stop,
2684         .dev_close            = bond_ethdev_close,
2685         .dev_configure        = bond_ethdev_configure,
2686         .dev_infos_get        = bond_ethdev_info,
2687         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2688         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2689         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2690         .rx_queue_release     = bond_ethdev_rx_queue_release,
2691         .tx_queue_release     = bond_ethdev_tx_queue_release,
2692         .link_update          = bond_ethdev_link_update,
2693         .stats_get            = bond_ethdev_stats_get,
2694         .stats_reset          = bond_ethdev_stats_reset,
2695         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2696         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2697         .reta_update          = bond_ethdev_rss_reta_update,
2698         .reta_query           = bond_ethdev_rss_reta_query,
2699         .rss_hash_update      = bond_ethdev_rss_hash_update,
2700         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2701 };
2702
2703 static int
2704 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2705 {
2706         const char *name = rte_vdev_device_name(dev);
2707         uint8_t socket_id = dev->device.numa_node;
2708         struct bond_dev_private *internals = NULL;
2709         struct rte_eth_dev *eth_dev = NULL;
2710         uint32_t vlan_filter_bmp_size;
2711
2712         /* now do all data allocation - for eth_dev structure, dummy pci driver
2713          * and internal (private) data
2714          */
2715
2716         /* reserve an ethdev entry */
2717         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2718         if (eth_dev == NULL) {
2719                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2720                 goto err;
2721         }
2722
2723         internals = eth_dev->data->dev_private;
2724         eth_dev->data->nb_rx_queues = (uint16_t)1;
2725         eth_dev->data->nb_tx_queues = (uint16_t)1;
2726
2727         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2728                         socket_id);
2729         if (eth_dev->data->mac_addrs == NULL) {
2730                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2731                 goto err;
2732         }
2733
2734         eth_dev->dev_ops = &default_dev_ops;
2735         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2736
2737         rte_spinlock_init(&internals->lock);
2738
2739         internals->port_id = eth_dev->data->port_id;
2740         internals->mode = BONDING_MODE_INVALID;
2741         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2742         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2743         internals->xmit_hash = xmit_l2_hash;
2744         internals->user_defined_mac = 0;
2745
2746         internals->link_status_polling_enabled = 0;
2747
2748         internals->link_status_polling_interval_ms =
2749                 DEFAULT_POLLING_INTERVAL_10_MS;
2750         internals->link_down_delay_ms = 0;
2751         internals->link_up_delay_ms = 0;
2752
2753         internals->slave_count = 0;
2754         internals->active_slave_count = 0;
2755         internals->rx_offload_capa = 0;
2756         internals->tx_offload_capa = 0;
2757         internals->candidate_max_rx_pktlen = 0;
2758         internals->max_rx_pktlen = 0;
2759
2760         /* Initially allow to choose any offload type */
2761         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2762
2763         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2764         memset(internals->slaves, 0, sizeof(internals->slaves));
2765
2766         /* Set mode 4 default configuration */
2767         bond_mode_8023ad_setup(eth_dev, NULL);
2768         if (bond_ethdev_mode_set(eth_dev, mode)) {
2769                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2770                                  eth_dev->data->port_id, mode);
2771                 goto err;
2772         }
2773
2774         vlan_filter_bmp_size =
2775                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2776         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2777                                                    RTE_CACHE_LINE_SIZE);
2778         if (internals->vlan_filter_bmpmem == NULL) {
2779                 RTE_BOND_LOG(ERR,
2780                              "Failed to allocate vlan bitmap for bonded device %u\n",
2781                              eth_dev->data->port_id);
2782                 goto err;
2783         }
2784
2785         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2786                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2787         if (internals->vlan_filter_bmp == NULL) {
2788                 RTE_BOND_LOG(ERR,
2789                              "Failed to init vlan bitmap for bonded device %u\n",
2790                              eth_dev->data->port_id);
2791                 rte_free(internals->vlan_filter_bmpmem);
2792                 goto err;
2793         }
2794
2795         return eth_dev->data->port_id;
2796
2797 err:
2798         rte_free(internals);
2799         if (eth_dev != NULL) {
2800                 rte_free(eth_dev->data->mac_addrs);
2801                 rte_eth_dev_release_port(eth_dev);
2802         }
2803         return -1;
2804 }
2805
2806 static int
2807 bond_probe(struct rte_vdev_device *dev)
2808 {
2809         const char *name;
2810         struct bond_dev_private *internals;
2811         struct rte_kvargs *kvlist;
2812         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2813         int  arg_count, port_id;
2814         uint8_t agg_mode;
2815
2816         if (!dev)
2817                 return -EINVAL;
2818
2819         name = rte_vdev_device_name(dev);
2820         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2821
2822         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2823                 pmd_bond_init_valid_arguments);
2824         if (kvlist == NULL)
2825                 return -1;
2826
2827         /* Parse link bonding mode */
2828         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2829                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2830                                 &bond_ethdev_parse_slave_mode_kvarg,
2831                                 &bonding_mode) != 0) {
2832                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2833                                         name);
2834                         goto parse_error;
2835                 }
2836         } else {
2837                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2838                                 "device %s\n", name);
2839                 goto parse_error;
2840         }
2841
2842         /* Parse socket id to create bonding device on */
2843         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2844         if (arg_count == 1) {
2845                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2846                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2847                                 != 0) {
2848                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2849                                         "bonded device %s\n", name);
2850                         goto parse_error;
2851                 }
2852         } else if (arg_count > 1) {
2853                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2854                                 "bonded device %s\n", name);
2855                 goto parse_error;
2856         } else {
2857                 socket_id = rte_socket_id();
2858         }
2859
2860         dev->device.numa_node = socket_id;
2861
2862         /* Create link bonding eth device */
2863         port_id = bond_alloc(dev, bonding_mode);
2864         if (port_id < 0) {
2865                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2866                                 "socket %u.\n", name, bonding_mode, socket_id);
2867                 goto parse_error;
2868         }
2869         internals = rte_eth_devices[port_id].data->dev_private;
2870         internals->kvlist = kvlist;
2871
2872
2873         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2874                 if (rte_kvargs_process(kvlist,
2875                                 PMD_BOND_AGG_MODE_KVARG,
2876                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
2877                                 &agg_mode) != 0) {
2878                         RTE_LOG(ERR, EAL,
2879                                         "Failed to parse agg selection mode for bonded device %s\n",
2880                                         name);
2881                         goto parse_error;
2882                 }
2883
2884                 if (internals->mode == BONDING_MODE_8023AD)
2885                         rte_eth_bond_8023ad_agg_selection_set(port_id,
2886                                         agg_mode);
2887         } else {
2888                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2889         }
2890
2891         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2892                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2893         return 0;
2894
2895 parse_error:
2896         rte_kvargs_free(kvlist);
2897
2898         return -1;
2899 }
2900
2901 static int
2902 bond_remove(struct rte_vdev_device *dev)
2903 {
2904         struct rte_eth_dev *eth_dev;
2905         struct bond_dev_private *internals;
2906         const char *name;
2907
2908         if (!dev)
2909                 return -EINVAL;
2910
2911         name = rte_vdev_device_name(dev);
2912         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2913
2914         /* now free all data allocation - for eth_dev structure,
2915          * dummy pci driver and internal (private) data
2916          */
2917
2918         /* find an ethdev entry */
2919         eth_dev = rte_eth_dev_allocated(name);
2920         if (eth_dev == NULL)
2921                 return -ENODEV;
2922
2923         RTE_ASSERT(eth_dev->device == &dev->device);
2924
2925         internals = eth_dev->data->dev_private;
2926         if (internals->slave_count != 0)
2927                 return -EBUSY;
2928
2929         if (eth_dev->data->dev_started == 1) {
2930                 bond_ethdev_stop(eth_dev);
2931                 bond_ethdev_close(eth_dev);
2932         }
2933
2934         eth_dev->dev_ops = NULL;
2935         eth_dev->rx_pkt_burst = NULL;
2936         eth_dev->tx_pkt_burst = NULL;
2937
2938         internals = eth_dev->data->dev_private;
2939         rte_bitmap_free(internals->vlan_filter_bmp);
2940         rte_free(internals->vlan_filter_bmpmem);
2941         rte_free(eth_dev->data->dev_private);
2942         rte_free(eth_dev->data->mac_addrs);
2943
2944         rte_eth_dev_release_port(eth_dev);
2945
2946         return 0;
2947 }
2948
2949 /* this part will resolve the slave portids after all the other pdev and vdev
2950  * have been allocated */
2951 static int
2952 bond_ethdev_configure(struct rte_eth_dev *dev)
2953 {
2954         const char *name = dev->device->name;
2955         struct bond_dev_private *internals = dev->data->dev_private;
2956         struct rte_kvargs *kvlist = internals->kvlist;
2957         int arg_count;
2958         uint16_t port_id = dev - rte_eth_devices;
2959         uint8_t agg_mode;
2960
2961         static const uint8_t default_rss_key[40] = {
2962                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2963                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2964                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2965                 0xBE, 0xAC, 0x01, 0xFA
2966         };
2967
2968         unsigned i, j;
2969
2970         /* If RSS is enabled, fill table and key with default values */
2971         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2972                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2973                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2974                 memcpy(internals->rss_key, default_rss_key, 40);
2975
2976                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2977                         internals->reta_conf[i].mask = ~0LL;
2978                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2979                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2980                 }
2981         }
2982
2983         /* set the max_rx_pktlen */
2984         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2985
2986         /*
2987          * if no kvlist, it means that this bonded device has been created
2988          * through the bonding api.
2989          */
2990         if (!kvlist)
2991                 return 0;
2992
2993         /* Parse MAC address for bonded device */
2994         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2995         if (arg_count == 1) {
2996                 struct ether_addr bond_mac;
2997
2998                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2999                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3000                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3001                                         name);
3002                         return -1;
3003                 }
3004
3005                 /* Set MAC address */
3006                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3007                         RTE_LOG(ERR, EAL,
3008                                         "Failed to set mac address on bonded device %s\n",
3009                                         name);
3010                         return -1;
3011                 }
3012         } else if (arg_count > 1) {
3013                 RTE_LOG(ERR, EAL,
3014                                 "MAC address can be specified only once for bonded device %s\n",
3015                                 name);
3016                 return -1;
3017         }
3018
3019         /* Parse/set balance mode transmit policy */
3020         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3021         if (arg_count == 1) {
3022                 uint8_t xmit_policy;
3023
3024                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3025                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3026                                                 0) {
3027                         RTE_LOG(INFO, EAL,
3028                                         "Invalid xmit policy specified for bonded device %s\n",
3029                                         name);
3030                         return -1;
3031                 }
3032
3033                 /* Set balance mode transmit policy*/
3034                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3035                         RTE_LOG(ERR, EAL,
3036                                         "Failed to set balance xmit policy on bonded device %s\n",
3037                                         name);
3038                         return -1;
3039                 }
3040         } else if (arg_count > 1) {
3041                 RTE_LOG(ERR, EAL,
3042                                 "Transmit policy can be specified only once for bonded device"
3043                                 " %s\n", name);
3044                 return -1;
3045         }
3046
3047         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3048                 if (rte_kvargs_process(kvlist,
3049                                 PMD_BOND_AGG_MODE_KVARG,
3050                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3051                                 &agg_mode) != 0) {
3052                         RTE_LOG(ERR, EAL,
3053                                         "Failed to parse agg selection mode for bonded device %s\n",
3054                                         name);
3055                 }
3056                 if (internals->mode == BONDING_MODE_8023AD)
3057                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3058                                                 agg_mode);
3059         }
3060
3061         /* Parse/add slave ports to bonded device */
3062         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3063                 struct bond_ethdev_slave_ports slave_ports;
3064                 unsigned i;
3065
3066                 memset(&slave_ports, 0, sizeof(slave_ports));
3067
3068                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3069                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3070                         RTE_LOG(ERR, EAL,
3071                                         "Failed to parse slave ports for bonded device %s\n",
3072                                         name);
3073                         return -1;
3074                 }
3075
3076                 for (i = 0; i < slave_ports.slave_count; i++) {
3077                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3078                                 RTE_LOG(ERR, EAL,
3079                                                 "Failed to add port %d as slave to bonded device %s\n",
3080                                                 slave_ports.slaves[i], name);
3081                         }
3082                 }
3083
3084         } else {
3085                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3086                 return -1;
3087         }
3088
3089         /* Parse/set primary slave port id*/
3090         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3091         if (arg_count == 1) {
3092                 uint16_t primary_slave_port_id;
3093
3094                 if (rte_kvargs_process(kvlist,
3095                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3096                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3097                                 &primary_slave_port_id) < 0) {
3098                         RTE_LOG(INFO, EAL,
3099                                         "Invalid primary slave port id specified for bonded device"
3100                                         " %s\n", name);
3101                         return -1;
3102                 }
3103
3104                 /* Set balance mode transmit policy*/
3105                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3106                                 != 0) {
3107                         RTE_LOG(ERR, EAL,
3108                                         "Failed to set primary slave port %d on bonded device %s\n",
3109                                         primary_slave_port_id, name);
3110                         return -1;
3111                 }
3112         } else if (arg_count > 1) {
3113                 RTE_LOG(INFO, EAL,
3114                                 "Primary slave can be specified only once for bonded device"
3115                                 " %s\n", name);
3116                 return -1;
3117         }
3118
3119         /* Parse link status monitor polling interval */
3120         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3121         if (arg_count == 1) {
3122                 uint32_t lsc_poll_interval_ms;
3123
3124                 if (rte_kvargs_process(kvlist,
3125                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3126                                 &bond_ethdev_parse_time_ms_kvarg,
3127                                 &lsc_poll_interval_ms) < 0) {
3128                         RTE_LOG(INFO, EAL,
3129                                         "Invalid lsc polling interval value specified for bonded"
3130                                         " device %s\n", name);
3131                         return -1;
3132                 }
3133
3134                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3135                                 != 0) {
3136                         RTE_LOG(ERR, EAL,
3137                                         "Failed to set lsc monitor polling interval (%u ms) on"
3138                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3139                         return -1;
3140                 }
3141         } else if (arg_count > 1) {
3142                 RTE_LOG(INFO, EAL,
3143                                 "LSC polling interval can be specified only once for bonded"
3144                                 " device %s\n", name);
3145                 return -1;
3146         }
3147
3148         /* Parse link up interrupt propagation delay */
3149         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3150         if (arg_count == 1) {
3151                 uint32_t link_up_delay_ms;
3152
3153                 if (rte_kvargs_process(kvlist,
3154                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3155                                 &bond_ethdev_parse_time_ms_kvarg,
3156                                 &link_up_delay_ms) < 0) {
3157                         RTE_LOG(INFO, EAL,
3158                                         "Invalid link up propagation delay value specified for"
3159                                         " bonded device %s\n", name);
3160                         return -1;
3161                 }
3162
3163                 /* Set balance mode transmit policy*/
3164                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3165                                 != 0) {
3166                         RTE_LOG(ERR, EAL,
3167                                         "Failed to set link up propagation delay (%u ms) on bonded"
3168                                         " device %s\n", link_up_delay_ms, name);
3169                         return -1;
3170                 }
3171         } else if (arg_count > 1) {
3172                 RTE_LOG(INFO, EAL,
3173                                 "Link up propagation delay can be specified only once for"
3174                                 " bonded device %s\n", name);
3175                 return -1;
3176         }
3177
3178         /* Parse link down interrupt propagation delay */
3179         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3180         if (arg_count == 1) {
3181                 uint32_t link_down_delay_ms;
3182
3183                 if (rte_kvargs_process(kvlist,
3184                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3185                                 &bond_ethdev_parse_time_ms_kvarg,
3186                                 &link_down_delay_ms) < 0) {
3187                         RTE_LOG(INFO, EAL,
3188                                         "Invalid link down propagation delay value specified for"
3189                                         " bonded device %s\n", name);
3190                         return -1;
3191                 }
3192
3193                 /* Set balance mode transmit policy*/
3194                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3195                                 != 0) {
3196                         RTE_LOG(ERR, EAL,
3197                                         "Failed to set link down propagation delay (%u ms) on"
3198                                         " bonded device %s\n", link_down_delay_ms, name);
3199                         return -1;
3200                 }
3201         } else if (arg_count > 1) {
3202                 RTE_LOG(INFO, EAL,
3203                                 "Link down propagation delay can be specified only once for"
3204                                 " bonded device %s\n", name);
3205                 return -1;
3206         }
3207
3208         return 0;
3209 }
3210
3211 struct rte_vdev_driver pmd_bond_drv = {
3212         .probe = bond_probe,
3213         .remove = bond_remove,
3214 };
3215
3216 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3217 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3218
3219 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3220         "slave=<ifc> "
3221         "primary=<ifc> "
3222         "mode=[0-6] "
3223         "xmit_policy=[l2 | l23 | l34] "
3224         "agg_mode=[count | stable | bandwidth] "
3225         "socket_id=<int> "
3226         "mac=<mac addr> "
3227         "lsc_poll_period_ms=<int> "
3228         "up_delay=<int> "
3229         "down_delay=<int>");