net/bonding: fix setting slave MAC addresses
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
24
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
27
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
29
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
32
33 static inline size_t
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
35 {
36         size_t vlan_offset = 0;
37
38         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
40
41                 vlan_offset = sizeof(struct vlan_hdr);
42                 *proto = vlan_hdr->eth_proto;
43
44                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45                         vlan_hdr = vlan_hdr + 1;
46                         *proto = vlan_hdr->eth_proto;
47                         vlan_offset += sizeof(struct vlan_hdr);
48                 }
49         }
50         return vlan_offset;
51 }
52
53 static uint16_t
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
55 {
56         struct bond_dev_private *internals;
57
58         uint16_t num_rx_slave = 0;
59         uint16_t num_rx_total = 0;
60
61         int i;
62
63         /* Cast to structure, containing bonded device's port id and queue id */
64         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
65
66         internals = bd_rx_q->dev_private;
67
68
69         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70                 /* Offset of pointer to *bufs increases as packets are received
71                  * from other slaves */
72                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
74                 if (num_rx_slave) {
75                         num_rx_total += num_rx_slave;
76                         nb_pkts -= num_rx_slave;
77                 }
78         }
79
80         return num_rx_total;
81 }
82
83 static uint16_t
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
85                 uint16_t nb_pkts)
86 {
87         struct bond_dev_private *internals;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94         return rte_eth_rx_burst(internals->current_primary_port,
95                         bd_rx_q->queue_id, bufs, nb_pkts);
96 }
97
98 static inline uint8_t
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
100 {
101         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
102
103         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104                 (ethertype == ether_type_slow_be &&
105                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
106 }
107
108 /*****************************************************************************
109  * Flow director's setup for mode 4 optimization
110  */
111
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113         .dst.addr_bytes = { 0 },
114         .src.addr_bytes = { 0 },
115         .type = RTE_BE16(ETHER_TYPE_SLOW),
116 };
117
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119         .dst.addr_bytes = { 0 },
120         .src.addr_bytes = { 0 },
121         .type = 0xFFFF,
122 };
123
124 static struct rte_flow_item flow_item_8023ad[] = {
125         {
126                 .type = RTE_FLOW_ITEM_TYPE_ETH,
127                 .spec = &flow_item_eth_type_8023ad,
128                 .last = NULL,
129                 .mask = &flow_item_eth_mask_type_8023ad,
130         },
131         {
132                 .type = RTE_FLOW_ITEM_TYPE_END,
133                 .spec = NULL,
134                 .last = NULL,
135                 .mask = NULL,
136         }
137 };
138
139 const struct rte_flow_attr flow_attr_8023ad = {
140         .group = 0,
141         .priority = 0,
142         .ingress = 1,
143         .egress = 0,
144         .reserved = 0,
145 };
146
147 int
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149                 uint16_t slave_port) {
150         struct rte_eth_dev_info slave_info;
151         struct rte_flow_error error;
152         struct bond_dev_private *internals = (struct bond_dev_private *)
153                         (bond_dev->data->dev_private);
154
155         const struct rte_flow_action_queue lacp_queue_conf = {
156                 .index = 0,
157         };
158
159         const struct rte_flow_action actions[] = {
160                 {
161                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162                         .conf = &lacp_queue_conf
163                 },
164                 {
165                         .type = RTE_FLOW_ACTION_TYPE_END,
166                 }
167         };
168
169         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170                         flow_item_8023ad, actions, &error);
171         if (ret < 0) {
172                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173                                 __func__, error.message, slave_port,
174                                 internals->mode4.dedicated_queues.rx_qid);
175                 return -1;
176         }
177
178         rte_eth_dev_info_get(slave_port, &slave_info);
179         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
181                 RTE_BOND_LOG(ERR,
182                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183                         __func__, slave_port);
184                 return -1;
185         }
186
187         return 0;
188 }
189
190 int
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193         struct bond_dev_private *internals = (struct bond_dev_private *)
194                         (bond_dev->data->dev_private);
195         struct rte_eth_dev_info bond_info;
196         uint16_t idx;
197
198         /* Verify if all slaves in bonding supports flow director and */
199         if (internals->slave_count > 0) {
200                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
201
202                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
204
205                 for (idx = 0; idx < internals->slave_count; idx++) {
206                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
207                                         internals->slaves[idx].port_id) != 0)
208                                 return -1;
209                 }
210         }
211
212         return 0;
213 }
214
215 int
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
217
218         struct rte_flow_error error;
219         struct bond_dev_private *internals = (struct bond_dev_private *)
220                         (bond_dev->data->dev_private);
221
222         struct rte_flow_action_queue lacp_queue_conf = {
223                 .index = internals->mode4.dedicated_queues.rx_qid,
224         };
225
226         const struct rte_flow_action actions[] = {
227                 {
228                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229                         .conf = &lacp_queue_conf
230                 },
231                 {
232                         .type = RTE_FLOW_ACTION_TYPE_END,
233                 }
234         };
235
236         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240                                 "(slave_port=%d queue_id=%d)",
241                                 error.message, slave_port,
242                                 internals->mode4.dedicated_queues.rx_qid);
243                 return -1;
244         }
245
246         return 0;
247 }
248
249 static uint16_t
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
251                 uint16_t nb_pkts)
252 {
253         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254         struct bond_dev_private *internals = bd_rx_q->dev_private;
255         uint16_t num_rx_total = 0;      /* Total number of received packets */
256         uint16_t slaves[RTE_MAX_ETHPORTS];
257         uint16_t slave_count;
258
259         uint16_t i, idx;
260
261         /* Copy slave list to protect against slave up/down changes during tx
262          * bursting */
263         slave_count = internals->active_slave_count;
264         memcpy(slaves, internals->active_slaves,
265                         sizeof(internals->active_slaves[0]) * slave_count);
266
267         for (i = 0, idx = internals->active_slave;
268                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269                 idx = idx % slave_count;
270
271                 /* Read packets from this slave */
272                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
274         }
275
276         internals->active_slave = idx;
277
278         return num_rx_total;
279 }
280
281 static uint16_t
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
283                 uint16_t nb_pkts)
284 {
285         struct bond_dev_private *internals;
286         struct bond_tx_queue *bd_tx_q;
287
288         uint16_t num_of_slaves;
289         uint16_t slaves[RTE_MAX_ETHPORTS];
290          /* positions in slaves, not ID */
291         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
292         uint8_t distributing_count;
293
294         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
295         uint16_t i, op_slave_idx;
296
297         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
298
299         /* Total amount of packets in slave_bufs */
300         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
301         /* Slow packets placed in each slave */
302
303         if (unlikely(nb_pkts == 0))
304                 return 0;
305
306         bd_tx_q = (struct bond_tx_queue *)queue;
307         internals = bd_tx_q->dev_private;
308
309         /* Copy slave list to protect against slave up/down changes during tx
310          * bursting */
311         num_of_slaves = internals->active_slave_count;
312         if (num_of_slaves < 1)
313                 return num_tx_total;
314
315         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
316                         num_of_slaves);
317
318         distributing_count = 0;
319         for (i = 0; i < num_of_slaves; i++) {
320                 struct port *port = &mode_8023ad_ports[slaves[i]];
321                 if (ACTOR_STATE(port, DISTRIBUTING))
322                         distributing_offsets[distributing_count++] = i;
323         }
324
325         if (likely(distributing_count > 0)) {
326                 /* Populate slaves mbuf with the packets which are to be sent */
327                 for (i = 0; i < nb_pkts; i++) {
328                         /* Select output slave using hash based on xmit policy */
329                         op_slave_idx = internals->xmit_hash(bufs[i],
330                                         distributing_count);
331
332                         /* Populate slave mbuf arrays with mbufs for that slave.
333                          * Use only slaves that are currently distributing.
334                          */
335                         uint8_t slave_offset =
336                                         distributing_offsets[op_slave_idx];
337                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
338                                         bufs[i];
339                         slave_nb_pkts[slave_offset]++;
340                 }
341         }
342
343         /* Send packet burst on each slave device */
344         for (i = 0; i < num_of_slaves; i++) {
345                 if (slave_nb_pkts[i] == 0)
346                         continue;
347
348                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
349                                 slave_bufs[i], slave_nb_pkts[i]);
350
351                 num_tx_total += num_tx_slave;
352                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
353
354                 /* If tx burst fails move packets to end of bufs */
355                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
356                         uint16_t j = nb_pkts - num_tx_fail_total;
357                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
358                                         num_tx_slave++)
359                                 bufs[j] = slave_bufs[i][num_tx_slave];
360                 }
361         }
362
363         return num_tx_total;
364 }
365
366
367 static uint16_t
368 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
369                 uint16_t nb_pkts)
370 {
371         /* Cast to structure, containing bonded device's port id and queue id */
372         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
373         struct bond_dev_private *internals = bd_rx_q->dev_private;
374         struct ether_addr bond_mac;
375
376         struct ether_hdr *hdr;
377
378         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
379         uint16_t num_rx_total = 0;      /* Total number of received packets */
380         uint16_t slaves[RTE_MAX_ETHPORTS];
381         uint16_t slave_count, idx;
382
383         uint8_t collecting;  /* current slave collecting status */
384         const uint8_t promisc = internals->promiscuous_en;
385         uint8_t i, j, k;
386         uint8_t subtype;
387
388         rte_eth_macaddr_get(internals->port_id, &bond_mac);
389         /* Copy slave list to protect against slave up/down changes during tx
390          * bursting */
391         slave_count = internals->active_slave_count;
392         memcpy(slaves, internals->active_slaves,
393                         sizeof(internals->active_slaves[0]) * slave_count);
394
395         idx = internals->active_slave;
396         if (idx >= slave_count) {
397                 internals->active_slave = 0;
398                 idx = 0;
399         }
400         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
401                 j = num_rx_total;
402                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
403                                          COLLECTING);
404
405                 /* Read packets from this slave */
406                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
407                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
408
409                 for (k = j; k < 2 && k < num_rx_total; k++)
410                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
411
412                 /* Handle slow protocol packets. */
413                 while (j < num_rx_total) {
414
415                         /* If packet is not pure L2 and is known, skip it */
416                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
417                                 j++;
418                                 continue;
419                         }
420
421                         if (j + 3 < num_rx_total)
422                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
423
424                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
425                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
426
427                         /* Remove packet from array if it is slow packet or slave is not
428                          * in collecting state or bonding interface is not in promiscuous
429                          * mode and packet address does not match. */
430                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
431                                 !collecting || (!promisc &&
432                                         !is_multicast_ether_addr(&hdr->d_addr) &&
433                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
434
435                                 if (hdr->ether_type == ether_type_slow_be) {
436                                         bond_mode_8023ad_handle_slow_pkt(
437                                             internals, slaves[idx], bufs[j]);
438                                 } else
439                                         rte_pktmbuf_free(bufs[j]);
440
441                                 /* Packet is managed by mode 4 or dropped, shift the array */
442                                 num_rx_total--;
443                                 if (j < num_rx_total) {
444                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
445                                                 (num_rx_total - j));
446                                 }
447                         } else
448                                 j++;
449                 }
450                 if (unlikely(++idx == slave_count))
451                         idx = 0;
452         }
453
454         internals->active_slave = idx;
455         return num_rx_total;
456 }
457
458 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
459 uint32_t burstnumberRX;
460 uint32_t burstnumberTX;
461
462 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
463
464 static void
465 arp_op_name(uint16_t arp_op, char *buf)
466 {
467         switch (arp_op) {
468         case ARP_OP_REQUEST:
469                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
470                 return;
471         case ARP_OP_REPLY:
472                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
473                 return;
474         case ARP_OP_REVREQUEST:
475                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
476                                 "Reverse ARP Request");
477                 return;
478         case ARP_OP_REVREPLY:
479                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
480                                 "Reverse ARP Reply");
481                 return;
482         case ARP_OP_INVREQUEST:
483                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
484                                 "Peer Identify Request");
485                 return;
486         case ARP_OP_INVREPLY:
487                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
488                                 "Peer Identify Reply");
489                 return;
490         default:
491                 break;
492         }
493         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
494         return;
495 }
496 #endif
497 #define MaxIPv4String   16
498 static void
499 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
500 {
501         uint32_t ipv4_addr;
502
503         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
504         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
505                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
506                 ipv4_addr & 0xFF);
507 }
508
509 #define MAX_CLIENTS_NUMBER      128
510 uint8_t active_clients;
511 struct client_stats_t {
512         uint16_t port;
513         uint32_t ipv4_addr;
514         uint32_t ipv4_rx_packets;
515         uint32_t ipv4_tx_packets;
516 };
517 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
518
519 static void
520 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
521 {
522         int i = 0;
523
524         for (; i < MAX_CLIENTS_NUMBER; i++)     {
525                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
526                         /* Just update RX packets number for this client */
527                         if (TXorRXindicator == &burstnumberRX)
528                                 client_stats[i].ipv4_rx_packets++;
529                         else
530                                 client_stats[i].ipv4_tx_packets++;
531                         return;
532                 }
533         }
534         /* We have a new client. Insert him to the table, and increment stats */
535         if (TXorRXindicator == &burstnumberRX)
536                 client_stats[active_clients].ipv4_rx_packets++;
537         else
538                 client_stats[active_clients].ipv4_tx_packets++;
539         client_stats[active_clients].ipv4_addr = addr;
540         client_stats[active_clients].port = port;
541         active_clients++;
542
543 }
544
545 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
546 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
547                 RTE_LOG(DEBUG, PMD, \
548                 "%s " \
549                 "port:%d " \
550                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
551                 "SrcIP:%s " \
552                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
553                 "DstIP:%s " \
554                 "%s " \
555                 "%d\n", \
556                 info, \
557                 port, \
558                 eth_h->s_addr.addr_bytes[0], \
559                 eth_h->s_addr.addr_bytes[1], \
560                 eth_h->s_addr.addr_bytes[2], \
561                 eth_h->s_addr.addr_bytes[3], \
562                 eth_h->s_addr.addr_bytes[4], \
563                 eth_h->s_addr.addr_bytes[5], \
564                 src_ip, \
565                 eth_h->d_addr.addr_bytes[0], \
566                 eth_h->d_addr.addr_bytes[1], \
567                 eth_h->d_addr.addr_bytes[2], \
568                 eth_h->d_addr.addr_bytes[3], \
569                 eth_h->d_addr.addr_bytes[4], \
570                 eth_h->d_addr.addr_bytes[5], \
571                 dst_ip, \
572                 arp_op, \
573                 ++burstnumber)
574 #endif
575
576 static void
577 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
578                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
579 {
580         struct ipv4_hdr *ipv4_h;
581 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
582         struct arp_hdr *arp_h;
583         char dst_ip[16];
584         char ArpOp[24];
585         char buf[16];
586 #endif
587         char src_ip[16];
588
589         uint16_t ether_type = eth_h->ether_type;
590         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
591
592 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
593         snprintf(buf, 16, "%s", info);
594 #endif
595
596         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
597                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
598                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
599 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
600                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
601                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
602 #endif
603                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
604         }
605 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
606         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
607                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
609                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
610                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
612         }
613 #endif
614 }
615 #endif
616
617 static uint16_t
618 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
619 {
620         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
621         struct bond_dev_private *internals = bd_tx_q->dev_private;
622         struct ether_hdr *eth_h;
623         uint16_t ether_type, offset;
624         uint16_t nb_recv_pkts;
625         int i;
626
627         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
628
629         for (i = 0; i < nb_recv_pkts; i++) {
630                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
631                 ether_type = eth_h->ether_type;
632                 offset = get_vlan_offset(eth_h, &ether_type);
633
634                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
637 #endif
638                         bond_mode_alb_arp_recv(eth_h, offset, internals);
639                 }
640 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
641                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
642                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
643 #endif
644         }
645
646         return nb_recv_pkts;
647 }
648
649 static uint16_t
650 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
651                 uint16_t nb_pkts)
652 {
653         struct bond_dev_private *internals;
654         struct bond_tx_queue *bd_tx_q;
655
656         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
657         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
658
659         uint16_t num_of_slaves;
660         uint16_t slaves[RTE_MAX_ETHPORTS];
661
662         uint16_t num_tx_total = 0, num_tx_slave;
663
664         static int slave_idx = 0;
665         int i, cslave_idx = 0, tx_fail_total = 0;
666
667         bd_tx_q = (struct bond_tx_queue *)queue;
668         internals = bd_tx_q->dev_private;
669
670         /* Copy slave list to protect against slave up/down changes during tx
671          * bursting */
672         num_of_slaves = internals->active_slave_count;
673         memcpy(slaves, internals->active_slaves,
674                         sizeof(internals->active_slaves[0]) * num_of_slaves);
675
676         if (num_of_slaves < 1)
677                 return num_tx_total;
678
679         /* Populate slaves mbuf with which packets are to be sent on it  */
680         for (i = 0; i < nb_pkts; i++) {
681                 cslave_idx = (slave_idx + i) % num_of_slaves;
682                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
683         }
684
685         /* increment current slave index so the next call to tx burst starts on the
686          * next slave */
687         slave_idx = ++cslave_idx;
688
689         /* Send packet burst on each slave device */
690         for (i = 0; i < num_of_slaves; i++) {
691                 if (slave_nb_pkts[i] > 0) {
692                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
693                                         slave_bufs[i], slave_nb_pkts[i]);
694
695                         /* if tx burst fails move packets to end of bufs */
696                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
697                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
698
699                                 tx_fail_total += tx_fail_slave;
700
701                                 memcpy(&bufs[nb_pkts - tx_fail_total],
702                                                 &slave_bufs[i][num_tx_slave],
703                                                 tx_fail_slave * sizeof(bufs[0]));
704                         }
705                         num_tx_total += num_tx_slave;
706                 }
707         }
708
709         return num_tx_total;
710 }
711
712 static uint16_t
713 bond_ethdev_tx_burst_active_backup(void *queue,
714                 struct rte_mbuf **bufs, uint16_t nb_pkts)
715 {
716         struct bond_dev_private *internals;
717         struct bond_tx_queue *bd_tx_q;
718
719         bd_tx_q = (struct bond_tx_queue *)queue;
720         internals = bd_tx_q->dev_private;
721
722         if (internals->active_slave_count < 1)
723                 return 0;
724
725         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
726                         bufs, nb_pkts);
727 }
728
729 static inline uint16_t
730 ether_hash(struct ether_hdr *eth_hdr)
731 {
732         unaligned_uint16_t *word_src_addr =
733                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
734         unaligned_uint16_t *word_dst_addr =
735                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
736
737         return (word_src_addr[0] ^ word_dst_addr[0]) ^
738                         (word_src_addr[1] ^ word_dst_addr[1]) ^
739                         (word_src_addr[2] ^ word_dst_addr[2]);
740 }
741
742 static inline uint32_t
743 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
744 {
745         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
746 }
747
748 static inline uint32_t
749 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
750 {
751         unaligned_uint32_t *word_src_addr =
752                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
753         unaligned_uint32_t *word_dst_addr =
754                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
755
756         return (word_src_addr[0] ^ word_dst_addr[0]) ^
757                         (word_src_addr[1] ^ word_dst_addr[1]) ^
758                         (word_src_addr[2] ^ word_dst_addr[2]) ^
759                         (word_src_addr[3] ^ word_dst_addr[3]);
760 }
761
762 uint16_t
763 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
764 {
765         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
766
767         uint32_t hash = ether_hash(eth_hdr);
768
769         return (hash ^= hash >> 8) % slave_count;
770 }
771
772 uint16_t
773 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
774 {
775         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
776         uint16_t proto = eth_hdr->ether_type;
777         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
778         uint32_t hash, l3hash = 0;
779
780         hash = ether_hash(eth_hdr);
781
782         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
783                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
784                                 ((char *)(eth_hdr + 1) + vlan_offset);
785                 l3hash = ipv4_hash(ipv4_hdr);
786
787         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
788                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
789                                 ((char *)(eth_hdr + 1) + vlan_offset);
790                 l3hash = ipv6_hash(ipv6_hdr);
791         }
792
793         hash = hash ^ l3hash;
794         hash ^= hash >> 16;
795         hash ^= hash >> 8;
796
797         return hash % slave_count;
798 }
799
800 uint16_t
801 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
802 {
803         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
804         uint16_t proto = eth_hdr->ether_type;
805         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
806
807         struct udp_hdr *udp_hdr = NULL;
808         struct tcp_hdr *tcp_hdr = NULL;
809         uint32_t hash, l3hash = 0, l4hash = 0;
810
811         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                 ((char *)(eth_hdr + 1) + vlan_offset);
814                 size_t ip_hdr_offset;
815
816                 l3hash = ipv4_hash(ipv4_hdr);
817
818                 /* there is no L4 header in fragmented packet */
819                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
820                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
821                                         IPV4_IHL_MULTIPLIER;
822
823                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
824                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
825                                                 ip_hdr_offset);
826                                 l4hash = HASH_L4_PORTS(tcp_hdr);
827                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
828                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
829                                                 ip_hdr_offset);
830                                 l4hash = HASH_L4_PORTS(udp_hdr);
831                         }
832                 }
833         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
834                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
835                                 ((char *)(eth_hdr + 1) + vlan_offset);
836                 l3hash = ipv6_hash(ipv6_hdr);
837
838                 if (ipv6_hdr->proto == IPPROTO_TCP) {
839                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
840                         l4hash = HASH_L4_PORTS(tcp_hdr);
841                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
842                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
843                         l4hash = HASH_L4_PORTS(udp_hdr);
844                 }
845         }
846
847         hash = l3hash ^ l4hash;
848         hash ^= hash >> 16;
849         hash ^= hash >> 8;
850
851         return hash % slave_count;
852 }
853
854 struct bwg_slave {
855         uint64_t bwg_left_int;
856         uint64_t bwg_left_remainder;
857         uint8_t slave;
858 };
859
860 void
861 bond_tlb_activate_slave(struct bond_dev_private *internals) {
862         int i;
863
864         for (i = 0; i < internals->active_slave_count; i++) {
865                 tlb_last_obytets[internals->active_slaves[i]] = 0;
866         }
867 }
868
869 static int
870 bandwidth_cmp(const void *a, const void *b)
871 {
872         const struct bwg_slave *bwg_a = a;
873         const struct bwg_slave *bwg_b = b;
874         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
875         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
876                         (int64_t)bwg_a->bwg_left_remainder;
877         if (diff > 0)
878                 return 1;
879         else if (diff < 0)
880                 return -1;
881         else if (diff2 > 0)
882                 return 1;
883         else if (diff2 < 0)
884                 return -1;
885         else
886                 return 0;
887 }
888
889 static void
890 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
891                 struct bwg_slave *bwg_slave)
892 {
893         struct rte_eth_link link_status;
894
895         rte_eth_link_get_nowait(port_id, &link_status);
896         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
897         if (link_bwg == 0)
898                 return;
899         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
900         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
901         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
902 }
903
904 static void
905 bond_ethdev_update_tlb_slave_cb(void *arg)
906 {
907         struct bond_dev_private *internals = arg;
908         struct rte_eth_stats slave_stats;
909         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
910         uint8_t slave_count;
911         uint64_t tx_bytes;
912
913         uint8_t update_stats = 0;
914         uint8_t i, slave_id;
915
916         internals->slave_update_idx++;
917
918
919         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
920                 update_stats = 1;
921
922         for (i = 0; i < internals->active_slave_count; i++) {
923                 slave_id = internals->active_slaves[i];
924                 rte_eth_stats_get(slave_id, &slave_stats);
925                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
926                 bandwidth_left(slave_id, tx_bytes,
927                                 internals->slave_update_idx, &bwg_array[i]);
928                 bwg_array[i].slave = slave_id;
929
930                 if (update_stats) {
931                         tlb_last_obytets[slave_id] = slave_stats.obytes;
932                 }
933         }
934
935         if (update_stats == 1)
936                 internals->slave_update_idx = 0;
937
938         slave_count = i;
939         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
940         for (i = 0; i < slave_count; i++)
941                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
942
943         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
944                         (struct bond_dev_private *)internals);
945 }
946
947 static uint16_t
948 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
949 {
950         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
951         struct bond_dev_private *internals = bd_tx_q->dev_private;
952
953         struct rte_eth_dev *primary_port =
954                         &rte_eth_devices[internals->primary_port];
955         uint16_t num_tx_total = 0;
956         uint16_t i, j;
957
958         uint16_t num_of_slaves = internals->active_slave_count;
959         uint16_t slaves[RTE_MAX_ETHPORTS];
960
961         struct ether_hdr *ether_hdr;
962         struct ether_addr primary_slave_addr;
963         struct ether_addr active_slave_addr;
964
965         if (num_of_slaves < 1)
966                 return num_tx_total;
967
968         memcpy(slaves, internals->tlb_slaves_order,
969                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
970
971
972         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
973
974         if (nb_pkts > 3) {
975                 for (i = 0; i < 3; i++)
976                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
977         }
978
979         for (i = 0; i < num_of_slaves; i++) {
980                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
981                 for (j = num_tx_total; j < nb_pkts; j++) {
982                         if (j + 3 < nb_pkts)
983                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
984
985                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
986                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
987                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
988 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
989                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
990 #endif
991                 }
992
993                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
994                                 bufs + num_tx_total, nb_pkts - num_tx_total);
995
996                 if (num_tx_total == nb_pkts)
997                         break;
998         }
999
1000         return num_tx_total;
1001 }
1002
1003 void
1004 bond_tlb_disable(struct bond_dev_private *internals)
1005 {
1006         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1007 }
1008
1009 void
1010 bond_tlb_enable(struct bond_dev_private *internals)
1011 {
1012         bond_ethdev_update_tlb_slave_cb(internals);
1013 }
1014
1015 static uint16_t
1016 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1017 {
1018         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1019         struct bond_dev_private *internals = bd_tx_q->dev_private;
1020
1021         struct ether_hdr *eth_h;
1022         uint16_t ether_type, offset;
1023
1024         struct client_data *client_info;
1025
1026         /*
1027          * We create transmit buffers for every slave and one additional to send
1028          * through tlb. In worst case every packet will be send on one port.
1029          */
1030         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1031         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1032
1033         /*
1034          * We create separate transmit buffers for update packets as they won't
1035          * be counted in num_tx_total.
1036          */
1037         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1038         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1039
1040         struct rte_mbuf *upd_pkt;
1041         size_t pkt_size;
1042
1043         uint16_t num_send, num_not_send = 0;
1044         uint16_t num_tx_total = 0;
1045         uint16_t slave_idx;
1046
1047         int i, j;
1048
1049         /* Search tx buffer for ARP packets and forward them to alb */
1050         for (i = 0; i < nb_pkts; i++) {
1051                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1052                 ether_type = eth_h->ether_type;
1053                 offset = get_vlan_offset(eth_h, &ether_type);
1054
1055                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1056                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1057
1058                         /* Change src mac in eth header */
1059                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1060
1061                         /* Add packet to slave tx buffer */
1062                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1063                         slave_bufs_pkts[slave_idx]++;
1064                 } else {
1065                         /* If packet is not ARP, send it with TLB policy */
1066                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1067                                         bufs[i];
1068                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1069                 }
1070         }
1071
1072         /* Update connected client ARP tables */
1073         if (internals->mode6.ntt) {
1074                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1075                         client_info = &internals->mode6.client_table[i];
1076
1077                         if (client_info->in_use) {
1078                                 /* Allocate new packet to send ARP update on current slave */
1079                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1080                                 if (upd_pkt == NULL) {
1081                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1082                                         continue;
1083                                 }
1084                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1085                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1086                                 upd_pkt->data_len = pkt_size;
1087                                 upd_pkt->pkt_len = pkt_size;
1088
1089                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1090                                                 internals);
1091
1092                                 /* Add packet to update tx buffer */
1093                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1094                                 update_bufs_pkts[slave_idx]++;
1095                         }
1096                 }
1097                 internals->mode6.ntt = 0;
1098         }
1099
1100         /* Send ARP packets on proper slaves */
1101         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1102                 if (slave_bufs_pkts[i] > 0) {
1103                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1104                                         slave_bufs[i], slave_bufs_pkts[i]);
1105                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1106                                 bufs[nb_pkts - 1 - num_not_send - j] =
1107                                                 slave_bufs[i][nb_pkts - 1 - j];
1108                         }
1109
1110                         num_tx_total += num_send;
1111                         num_not_send += slave_bufs_pkts[i] - num_send;
1112
1113 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1114         /* Print TX stats including update packets */
1115                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1116                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1117                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1118                         }
1119 #endif
1120                 }
1121         }
1122
1123         /* Send update packets on proper slaves */
1124         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1125                 if (update_bufs_pkts[i] > 0) {
1126                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1127                                         update_bufs_pkts[i]);
1128                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1129                                 rte_pktmbuf_free(update_bufs[i][j]);
1130                         }
1131 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1132                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1133                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1134                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1135                         }
1136 #endif
1137                 }
1138         }
1139
1140         /* Send non-ARP packets using tlb policy */
1141         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1142                 num_send = bond_ethdev_tx_burst_tlb(queue,
1143                                 slave_bufs[RTE_MAX_ETHPORTS],
1144                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1145
1146                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1147                         bufs[nb_pkts - 1 - num_not_send - j] =
1148                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1149                 }
1150
1151                 num_tx_total += num_send;
1152         }
1153
1154         return num_tx_total;
1155 }
1156
1157 static uint16_t
1158 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1159                 uint16_t nb_pkts)
1160 {
1161         struct bond_dev_private *internals;
1162         struct bond_tx_queue *bd_tx_q;
1163
1164         uint16_t num_of_slaves;
1165         uint16_t slaves[RTE_MAX_ETHPORTS];
1166
1167         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1168
1169         int i, op_slave_id;
1170
1171         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1172         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1173
1174         bd_tx_q = (struct bond_tx_queue *)queue;
1175         internals = bd_tx_q->dev_private;
1176
1177         /* Copy slave list to protect against slave up/down changes during tx
1178          * bursting */
1179         num_of_slaves = internals->active_slave_count;
1180         memcpy(slaves, internals->active_slaves,
1181                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1182
1183         if (num_of_slaves < 1)
1184                 return num_tx_total;
1185
1186         /* Populate slaves mbuf with the packets which are to be sent on it  */
1187         for (i = 0; i < nb_pkts; i++) {
1188                 /* Select output slave using hash based on xmit policy */
1189                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1190
1191                 /* Populate slave mbuf arrays with mbufs for that slave */
1192                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1193         }
1194
1195         /* Send packet burst on each slave device */
1196         for (i = 0; i < num_of_slaves; i++) {
1197                 if (slave_nb_pkts[i] > 0) {
1198                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1199                                         slave_bufs[i], slave_nb_pkts[i]);
1200
1201                         /* if tx burst fails move packets to end of bufs */
1202                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1203                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1204
1205                                 tx_fail_total += slave_tx_fail_count;
1206                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1207                                                 &slave_bufs[i][num_tx_slave],
1208                                                 slave_tx_fail_count * sizeof(bufs[0]));
1209                         }
1210
1211                         num_tx_total += num_tx_slave;
1212                 }
1213         }
1214
1215         return num_tx_total;
1216 }
1217
1218 static uint16_t
1219 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1220                 uint16_t nb_pkts)
1221 {
1222         struct bond_dev_private *internals;
1223         struct bond_tx_queue *bd_tx_q;
1224
1225         uint16_t num_of_slaves;
1226         uint16_t slaves[RTE_MAX_ETHPORTS];
1227          /* positions in slaves, not ID */
1228         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1229         uint8_t distributing_count;
1230
1231         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1232         uint16_t i, op_slave_idx;
1233
1234         /* Allocate additional packets in case 8023AD mode. */
1235         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1236
1237         /* Total amount of packets in slave_bufs */
1238         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1239         /* Slow packets placed in each slave */
1240         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1241
1242         bd_tx_q = (struct bond_tx_queue *)queue;
1243         internals = bd_tx_q->dev_private;
1244
1245         /* Copy slave list to protect against slave up/down changes during tx
1246          * bursting */
1247         num_of_slaves = internals->active_slave_count;
1248         if (num_of_slaves < 1)
1249                 return num_tx_total;
1250
1251         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1252
1253         distributing_count = 0;
1254         for (i = 0; i < num_of_slaves; i++) {
1255                 struct port *port = &mode_8023ad_ports[slaves[i]];
1256
1257                 if (ACTOR_STATE(port, DISTRIBUTING))
1258                         distributing_offsets[distributing_count++] = i;
1259         }
1260
1261         if (likely(distributing_count > 0)) {
1262                 /* Populate slaves mbuf with the packets which are to be sent on it */
1263                 for (i = 0; i < nb_pkts; i++) {
1264                         /* Select output slave using hash based on xmit policy */
1265                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1266
1267                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1268                          * slaves that are currently distributing. */
1269                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1270                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1271                         slave_nb_pkts[slave_offset]++;
1272                 }
1273         }
1274
1275         /* Send packet burst on each slave device */
1276         for (i = 0; i < num_of_slaves; i++) {
1277                 if (slave_nb_pkts[i] == 0)
1278                         continue;
1279
1280                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1281                                 slave_bufs[i], slave_nb_pkts[i]);
1282
1283                 /* If tx burst fails drop slow packets */
1284                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1285                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1286
1287                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1288                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1289
1290                 /* If tx burst fails move packets to end of bufs */
1291                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1292                         uint16_t j = nb_pkts - num_tx_fail_total;
1293                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1294                                 bufs[j] = slave_bufs[i][num_tx_slave];
1295                 }
1296         }
1297
1298         /* Check for LACP control packets and send if available */
1299         for (i = 0; i < num_of_slaves; i++) {
1300                 struct port *port = &mode_8023ad_ports[slaves[i]];
1301                 struct rte_mbuf *ctrl_pkt = NULL;
1302
1303                 int pkt_avail = rte_ring_dequeue(port->tx_ring,
1304                                 (void **)&ctrl_pkt);
1305
1306                 if (unlikely(pkt_avail == 0)) {
1307                         num_tx_slave = rte_eth_tx_burst(slaves[i],
1308                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1309
1310                         /*
1311                          * re-enqueue LAG control plane packets to buffering
1312                          * ring if transmission fails so the packet isn't lost.
1313                          */
1314                         if (num_tx_slave != nb_pkts)
1315                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1316                 }
1317         }
1318
1319         return num_tx_total;
1320 }
1321
1322 static uint16_t
1323 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1324                 uint16_t nb_pkts)
1325 {
1326         struct bond_dev_private *internals;
1327         struct bond_tx_queue *bd_tx_q;
1328
1329         uint8_t tx_failed_flag = 0, num_of_slaves;
1330         uint16_t slaves[RTE_MAX_ETHPORTS];
1331
1332         uint16_t max_nb_of_tx_pkts = 0;
1333
1334         int slave_tx_total[RTE_MAX_ETHPORTS];
1335         int i, most_successful_tx_slave = -1;
1336
1337         bd_tx_q = (struct bond_tx_queue *)queue;
1338         internals = bd_tx_q->dev_private;
1339
1340         /* Copy slave list to protect against slave up/down changes during tx
1341          * bursting */
1342         num_of_slaves = internals->active_slave_count;
1343         memcpy(slaves, internals->active_slaves,
1344                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1345
1346         if (num_of_slaves < 1)
1347                 return 0;
1348
1349         /* Increment reference count on mbufs */
1350         for (i = 0; i < nb_pkts; i++)
1351                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1352
1353         /* Transmit burst on each active slave */
1354         for (i = 0; i < num_of_slaves; i++) {
1355                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1356                                         bufs, nb_pkts);
1357
1358                 if (unlikely(slave_tx_total[i] < nb_pkts))
1359                         tx_failed_flag = 1;
1360
1361                 /* record the value and slave index for the slave which transmits the
1362                  * maximum number of packets */
1363                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1364                         max_nb_of_tx_pkts = slave_tx_total[i];
1365                         most_successful_tx_slave = i;
1366                 }
1367         }
1368
1369         /* if slaves fail to transmit packets from burst, the calling application
1370          * is not expected to know about multiple references to packets so we must
1371          * handle failures of all packets except those of the most successful slave
1372          */
1373         if (unlikely(tx_failed_flag))
1374                 for (i = 0; i < num_of_slaves; i++)
1375                         if (i != most_successful_tx_slave)
1376                                 while (slave_tx_total[i] < nb_pkts)
1377                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1378
1379         return max_nb_of_tx_pkts;
1380 }
1381
1382 void
1383 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1384 {
1385         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1386
1387         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1388                 /**
1389                  * If in mode 4 then save the link properties of the first
1390                  * slave, all subsequent slaves must match these properties
1391                  */
1392                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1393
1394                 bond_link->link_autoneg = slave_link->link_autoneg;
1395                 bond_link->link_duplex = slave_link->link_duplex;
1396                 bond_link->link_speed = slave_link->link_speed;
1397         } else {
1398                 /**
1399                  * In any other mode the link properties are set to default
1400                  * values of AUTONEG/DUPLEX
1401                  */
1402                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1403                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1404         }
1405 }
1406
1407 int
1408 link_properties_valid(struct rte_eth_dev *ethdev,
1409                 struct rte_eth_link *slave_link)
1410 {
1411         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1412
1413         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1414                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1415
1416                 if (bond_link->link_duplex != slave_link->link_duplex ||
1417                         bond_link->link_autoneg != slave_link->link_autoneg ||
1418                         bond_link->link_speed != slave_link->link_speed)
1419                         return -1;
1420         }
1421
1422         return 0;
1423 }
1424
1425 int
1426 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1427 {
1428         struct ether_addr *mac_addr;
1429
1430         if (eth_dev == NULL) {
1431                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1432                 return -1;
1433         }
1434
1435         if (dst_mac_addr == NULL) {
1436                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1437                 return -1;
1438         }
1439
1440         mac_addr = eth_dev->data->mac_addrs;
1441
1442         ether_addr_copy(mac_addr, dst_mac_addr);
1443         return 0;
1444 }
1445
1446 int
1447 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1448 {
1449         struct ether_addr *mac_addr;
1450
1451         if (eth_dev == NULL) {
1452                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1453                 return -1;
1454         }
1455
1456         if (new_mac_addr == NULL) {
1457                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1458                 return -1;
1459         }
1460
1461         mac_addr = eth_dev->data->mac_addrs;
1462
1463         /* If new MAC is different to current MAC then update */
1464         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1465                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1466
1467         return 0;
1468 }
1469
1470 int
1471 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1472 {
1473         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1474         int i;
1475
1476         /* Update slave devices MAC addresses */
1477         if (internals->slave_count < 1)
1478                 return -1;
1479
1480         switch (internals->mode) {
1481         case BONDING_MODE_ROUND_ROBIN:
1482         case BONDING_MODE_BALANCE:
1483         case BONDING_MODE_BROADCAST:
1484                 for (i = 0; i < internals->slave_count; i++) {
1485                         if (rte_eth_dev_default_mac_addr_set(
1486                                         internals->slaves[i].port_id,
1487                                         bonded_eth_dev->data->mac_addrs)) {
1488                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1489                                                 internals->slaves[i].port_id);
1490                                 return -1;
1491                         }
1492                 }
1493                 break;
1494         case BONDING_MODE_8023AD:
1495                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1496                 break;
1497         case BONDING_MODE_ACTIVE_BACKUP:
1498         case BONDING_MODE_TLB:
1499         case BONDING_MODE_ALB:
1500         default:
1501                 for (i = 0; i < internals->slave_count; i++) {
1502                         if (internals->slaves[i].port_id ==
1503                                         internals->current_primary_port) {
1504                                 if (rte_eth_dev_default_mac_addr_set(
1505                                                 internals->primary_port,
1506                                                 bonded_eth_dev->data->mac_addrs)) {
1507                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1508                                                         internals->current_primary_port);
1509                                         return -1;
1510                                 }
1511                         } else {
1512                                 if (rte_eth_dev_default_mac_addr_set(
1513                                                 internals->slaves[i].port_id,
1514                                                 &internals->slaves[i].persisted_mac_addr)) {
1515                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1516                                                         internals->slaves[i].port_id);
1517                                         return -1;
1518                                 }
1519                         }
1520                 }
1521         }
1522
1523         return 0;
1524 }
1525
1526 int
1527 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1528 {
1529         struct bond_dev_private *internals;
1530
1531         internals = eth_dev->data->dev_private;
1532
1533         switch (mode) {
1534         case BONDING_MODE_ROUND_ROBIN:
1535                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1536                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1537                 break;
1538         case BONDING_MODE_ACTIVE_BACKUP:
1539                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1540                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1541                 break;
1542         case BONDING_MODE_BALANCE:
1543                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1544                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1545                 break;
1546         case BONDING_MODE_BROADCAST:
1547                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1548                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1549                 break;
1550         case BONDING_MODE_8023AD:
1551                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1552                         return -1;
1553
1554                 if (internals->mode4.dedicated_queues.enabled == 0) {
1555                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1556                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1557                         RTE_LOG(WARNING, PMD,
1558                                 "Using mode 4, it is necessary to do TX burst "
1559                                 "and RX burst at least every 100ms.\n");
1560                 } else {
1561                         /* Use flow director's optimization */
1562                         eth_dev->rx_pkt_burst =
1563                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1564                         eth_dev->tx_pkt_burst =
1565                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1566                 }
1567                 break;
1568         case BONDING_MODE_TLB:
1569                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1570                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1571                 break;
1572         case BONDING_MODE_ALB:
1573                 if (bond_mode_alb_enable(eth_dev) != 0)
1574                         return -1;
1575
1576                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1577                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1578                 break;
1579         default:
1580                 return -1;
1581         }
1582
1583         internals->mode = mode;
1584
1585         return 0;
1586 }
1587
1588
1589 static int
1590 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1591                 struct rte_eth_dev *slave_eth_dev)
1592 {
1593         int errval = 0;
1594         struct bond_dev_private *internals = (struct bond_dev_private *)
1595                 bonded_eth_dev->data->dev_private;
1596         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1597
1598         if (port->slow_pool == NULL) {
1599                 char mem_name[256];
1600                 int slave_id = slave_eth_dev->data->port_id;
1601
1602                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1603                                 slave_id);
1604                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1605                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1606                         slave_eth_dev->data->numa_node);
1607
1608                 /* Any memory allocation failure in initialization is critical because
1609                  * resources can't be free, so reinitialization is impossible. */
1610                 if (port->slow_pool == NULL) {
1611                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1612                                 slave_id, mem_name, rte_strerror(rte_errno));
1613                 }
1614         }
1615
1616         if (internals->mode4.dedicated_queues.enabled == 1) {
1617                 /* Configure slow Rx queue */
1618
1619                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1620                                 internals->mode4.dedicated_queues.rx_qid, 128,
1621                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1622                                 NULL, port->slow_pool);
1623                 if (errval != 0) {
1624                         RTE_BOND_LOG(ERR,
1625                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1626                                         slave_eth_dev->data->port_id,
1627                                         internals->mode4.dedicated_queues.rx_qid,
1628                                         errval);
1629                         return errval;
1630                 }
1631
1632                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1633                                 internals->mode4.dedicated_queues.tx_qid, 512,
1634                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1635                                 NULL);
1636                 if (errval != 0) {
1637                         RTE_BOND_LOG(ERR,
1638                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1639                                 slave_eth_dev->data->port_id,
1640                                 internals->mode4.dedicated_queues.tx_qid,
1641                                 errval);
1642                         return errval;
1643                 }
1644         }
1645         return 0;
1646 }
1647
1648 int
1649 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1650                 struct rte_eth_dev *slave_eth_dev)
1651 {
1652         struct bond_rx_queue *bd_rx_q;
1653         struct bond_tx_queue *bd_tx_q;
1654         uint16_t nb_rx_queues;
1655         uint16_t nb_tx_queues;
1656
1657         int errval;
1658         uint16_t q_id;
1659         struct rte_flow_error flow_error;
1660
1661         struct bond_dev_private *internals = (struct bond_dev_private *)
1662                 bonded_eth_dev->data->dev_private;
1663
1664         /* Stop slave */
1665         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1666
1667         /* Enable interrupts on slave device if supported */
1668         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1669                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1670
1671         /* If RSS is enabled for bonding, try to enable it for slaves  */
1672         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1673                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1674                                 != 0) {
1675                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1676                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1677                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1678                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1679                 } else {
1680                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1681                 }
1682
1683                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1684                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1685                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1686                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1687         }
1688
1689         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1690                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1691
1692         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1693         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1694
1695         if (internals->mode == BONDING_MODE_8023AD) {
1696                 if (internals->mode4.dedicated_queues.enabled == 1) {
1697                         nb_rx_queues++;
1698                         nb_tx_queues++;
1699                 }
1700         }
1701
1702         /* Configure device */
1703         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1704                         nb_rx_queues, nb_tx_queues,
1705                         &(slave_eth_dev->data->dev_conf));
1706         if (errval != 0) {
1707                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1708                                 slave_eth_dev->data->port_id, errval);
1709                 return errval;
1710         }
1711
1712         /* Setup Rx Queues */
1713         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1714                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1715
1716                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1717                                 bd_rx_q->nb_rx_desc,
1718                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1719                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1720                 if (errval != 0) {
1721                         RTE_BOND_LOG(ERR,
1722                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1723                                         slave_eth_dev->data->port_id, q_id, errval);
1724                         return errval;
1725                 }
1726         }
1727
1728         /* Setup Tx Queues */
1729         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1730                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1731
1732                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1733                                 bd_tx_q->nb_tx_desc,
1734                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1735                                 &bd_tx_q->tx_conf);
1736                 if (errval != 0) {
1737                         RTE_BOND_LOG(ERR,
1738                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1739                                 slave_eth_dev->data->port_id, q_id, errval);
1740                         return errval;
1741                 }
1742         }
1743
1744         if (internals->mode == BONDING_MODE_8023AD &&
1745                         internals->mode4.dedicated_queues.enabled == 1) {
1746                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1747                                 != 0)
1748                         return errval;
1749
1750                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1751                                 slave_eth_dev->data->port_id) != 0) {
1752                         RTE_BOND_LOG(ERR,
1753                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1754                                 slave_eth_dev->data->port_id, q_id, errval);
1755                         return -1;
1756                 }
1757
1758                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1759                         rte_flow_destroy(slave_eth_dev->data->port_id,
1760                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1761                                         &flow_error);
1762
1763                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1764                                 slave_eth_dev->data->port_id);
1765         }
1766
1767         /* Start device */
1768         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1769         if (errval != 0) {
1770                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1771                                 slave_eth_dev->data->port_id, errval);
1772                 return -1;
1773         }
1774
1775         /* If RSS is enabled for bonding, synchronize RETA */
1776         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1777                 int i;
1778                 struct bond_dev_private *internals;
1779
1780                 internals = bonded_eth_dev->data->dev_private;
1781
1782                 for (i = 0; i < internals->slave_count; i++) {
1783                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1784                                 errval = rte_eth_dev_rss_reta_update(
1785                                                 slave_eth_dev->data->port_id,
1786                                                 &internals->reta_conf[0],
1787                                                 internals->slaves[i].reta_size);
1788                                 if (errval != 0) {
1789                                         RTE_LOG(WARNING, PMD,
1790                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1791                                                         " RSS Configuration for bonding may be inconsistent.\n",
1792                                                         slave_eth_dev->data->port_id, errval);
1793                                 }
1794                                 break;
1795                         }
1796                 }
1797         }
1798
1799         /* If lsc interrupt is set, check initial slave's link status */
1800         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1801                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1802                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1803                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1804                         NULL);
1805         }
1806
1807         return 0;
1808 }
1809
1810 void
1811 slave_remove(struct bond_dev_private *internals,
1812                 struct rte_eth_dev *slave_eth_dev)
1813 {
1814         uint8_t i;
1815
1816         for (i = 0; i < internals->slave_count; i++)
1817                 if (internals->slaves[i].port_id ==
1818                                 slave_eth_dev->data->port_id)
1819                         break;
1820
1821         if (i < (internals->slave_count - 1))
1822                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1823                                 sizeof(internals->slaves[0]) *
1824                                 (internals->slave_count - i - 1));
1825
1826         internals->slave_count--;
1827
1828         /* force reconfiguration of slave interfaces */
1829         _rte_eth_dev_reset(slave_eth_dev);
1830 }
1831
1832 static void
1833 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1834
1835 void
1836 slave_add(struct bond_dev_private *internals,
1837                 struct rte_eth_dev *slave_eth_dev)
1838 {
1839         struct bond_slave_details *slave_details =
1840                         &internals->slaves[internals->slave_count];
1841
1842         slave_details->port_id = slave_eth_dev->data->port_id;
1843         slave_details->last_link_status = 0;
1844
1845         /* Mark slave devices that don't support interrupts so we can
1846          * compensate when we start the bond
1847          */
1848         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1849                 slave_details->link_status_poll_enabled = 1;
1850         }
1851
1852         slave_details->link_status_wait_to_complete = 0;
1853         /* clean tlb_last_obytes when adding port for bonding device */
1854         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1855                         sizeof(struct ether_addr));
1856 }
1857
1858 void
1859 bond_ethdev_primary_set(struct bond_dev_private *internals,
1860                 uint16_t slave_port_id)
1861 {
1862         int i;
1863
1864         if (internals->active_slave_count < 1)
1865                 internals->current_primary_port = slave_port_id;
1866         else
1867                 /* Search bonded device slave ports for new proposed primary port */
1868                 for (i = 0; i < internals->active_slave_count; i++) {
1869                         if (internals->active_slaves[i] == slave_port_id)
1870                                 internals->current_primary_port = slave_port_id;
1871                 }
1872 }
1873
1874 static void
1875 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1876
1877 static int
1878 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1879 {
1880         struct bond_dev_private *internals;
1881         int i;
1882
1883         /* slave eth dev will be started by bonded device */
1884         if (check_for_bonded_ethdev(eth_dev)) {
1885                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1886                                 eth_dev->data->port_id);
1887                 return -1;
1888         }
1889
1890         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1891         eth_dev->data->dev_started = 1;
1892
1893         internals = eth_dev->data->dev_private;
1894
1895         if (internals->slave_count == 0) {
1896                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1897                 return -1;
1898         }
1899
1900         if (internals->user_defined_mac == 0) {
1901                 struct ether_addr *new_mac_addr = NULL;
1902
1903                 for (i = 0; i < internals->slave_count; i++)
1904                         if (internals->slaves[i].port_id == internals->primary_port)
1905                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1906
1907                 if (new_mac_addr == NULL)
1908                         return -1;
1909
1910                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1911                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1912                                         eth_dev->data->port_id);
1913                         return -1;
1914                 }
1915         }
1916
1917         /* Update all slave devices MACs*/
1918         if (mac_address_slaves_update(eth_dev) != 0)
1919                 return -1;
1920
1921         /* If bonded device is configure in promiscuous mode then re-apply config */
1922         if (internals->promiscuous_en)
1923                 bond_ethdev_promiscuous_enable(eth_dev);
1924
1925         if (internals->mode == BONDING_MODE_8023AD) {
1926                 if (internals->mode4.dedicated_queues.enabled == 1) {
1927                         internals->mode4.dedicated_queues.rx_qid =
1928                                         eth_dev->data->nb_rx_queues;
1929                         internals->mode4.dedicated_queues.tx_qid =
1930                                         eth_dev->data->nb_tx_queues;
1931                 }
1932         }
1933
1934
1935         /* Reconfigure each slave device if starting bonded device */
1936         for (i = 0; i < internals->slave_count; i++) {
1937                 struct rte_eth_dev *slave_ethdev =
1938                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1939                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1940                         RTE_BOND_LOG(ERR,
1941                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1942                                 eth_dev->data->port_id,
1943                                 internals->slaves[i].port_id);
1944                         return -1;
1945                 }
1946                 /* We will need to poll for link status if any slave doesn't
1947                  * support interrupts
1948                  */
1949                 if (internals->slaves[i].link_status_poll_enabled)
1950                         internals->link_status_polling_enabled = 1;
1951         }
1952         /* start polling if needed */
1953         if (internals->link_status_polling_enabled) {
1954                 rte_eal_alarm_set(
1955                         internals->link_status_polling_interval_ms * 1000,
1956                         bond_ethdev_slave_link_status_change_monitor,
1957                         (void *)&rte_eth_devices[internals->port_id]);
1958         }
1959
1960         if (internals->user_defined_primary_port)
1961                 bond_ethdev_primary_set(internals, internals->primary_port);
1962
1963         if (internals->mode == BONDING_MODE_8023AD)
1964                 bond_mode_8023ad_start(eth_dev);
1965
1966         if (internals->mode == BONDING_MODE_TLB ||
1967                         internals->mode == BONDING_MODE_ALB)
1968                 bond_tlb_enable(internals);
1969
1970         return 0;
1971 }
1972
1973 static void
1974 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1975 {
1976         uint8_t i;
1977
1978         if (dev->data->rx_queues != NULL) {
1979                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1980                         rte_free(dev->data->rx_queues[i]);
1981                         dev->data->rx_queues[i] = NULL;
1982                 }
1983                 dev->data->nb_rx_queues = 0;
1984         }
1985
1986         if (dev->data->tx_queues != NULL) {
1987                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1988                         rte_free(dev->data->tx_queues[i]);
1989                         dev->data->tx_queues[i] = NULL;
1990                 }
1991                 dev->data->nb_tx_queues = 0;
1992         }
1993 }
1994
1995 void
1996 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1997 {
1998         struct bond_dev_private *internals = eth_dev->data->dev_private;
1999         uint8_t i;
2000
2001         if (internals->mode == BONDING_MODE_8023AD) {
2002                 struct port *port;
2003                 void *pkt = NULL;
2004
2005                 bond_mode_8023ad_stop(eth_dev);
2006
2007                 /* Discard all messages to/from mode 4 state machines */
2008                 for (i = 0; i < internals->active_slave_count; i++) {
2009                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2010
2011                         RTE_ASSERT(port->rx_ring != NULL);
2012                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2013                                 rte_pktmbuf_free(pkt);
2014
2015                         RTE_ASSERT(port->tx_ring != NULL);
2016                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2017                                 rte_pktmbuf_free(pkt);
2018                 }
2019         }
2020
2021         if (internals->mode == BONDING_MODE_TLB ||
2022                         internals->mode == BONDING_MODE_ALB) {
2023                 bond_tlb_disable(internals);
2024                 for (i = 0; i < internals->active_slave_count; i++)
2025                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2026         }
2027
2028         internals->active_slave_count = 0;
2029         internals->link_status_polling_enabled = 0;
2030         for (i = 0; i < internals->slave_count; i++)
2031                 internals->slaves[i].last_link_status = 0;
2032
2033         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2034         eth_dev->data->dev_started = 0;
2035 }
2036
2037 void
2038 bond_ethdev_close(struct rte_eth_dev *dev)
2039 {
2040         struct bond_dev_private *internals = dev->data->dev_private;
2041         uint8_t bond_port_id = internals->port_id;
2042         int skipped = 0;
2043
2044         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2045         while (internals->slave_count != skipped) {
2046                 uint16_t port_id = internals->slaves[skipped].port_id;
2047
2048                 rte_eth_dev_stop(port_id);
2049
2050                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2051                         RTE_LOG(ERR, EAL,
2052                                 "Failed to remove port %d from bonded device "
2053                                 "%s\n", port_id, dev->device->name);
2054                         skipped++;
2055                 }
2056         }
2057         bond_ethdev_free_queues(dev);
2058         rte_bitmap_reset(internals->vlan_filter_bmp);
2059 }
2060
2061 /* forward declaration */
2062 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2063
2064 static void
2065 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2066 {
2067         struct bond_dev_private *internals = dev->data->dev_private;
2068
2069         uint16_t max_nb_rx_queues = UINT16_MAX;
2070         uint16_t max_nb_tx_queues = UINT16_MAX;
2071
2072         dev_info->max_mac_addrs = 1;
2073
2074         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2075                         internals->candidate_max_rx_pktlen :
2076                         ETHER_MAX_JUMBO_FRAME_LEN;
2077
2078         /* Max number of tx/rx queues that the bonded device can support is the
2079          * minimum values of the bonded slaves, as all slaves must be capable
2080          * of supporting the same number of tx/rx queues.
2081          */
2082         if (internals->slave_count > 0) {
2083                 struct rte_eth_dev_info slave_info;
2084                 uint8_t idx;
2085
2086                 for (idx = 0; idx < internals->slave_count; idx++) {
2087                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2088                                         &slave_info);
2089
2090                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2091                                 max_nb_rx_queues = slave_info.max_rx_queues;
2092
2093                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2094                                 max_nb_tx_queues = slave_info.max_tx_queues;
2095                 }
2096         }
2097
2098         dev_info->max_rx_queues = max_nb_rx_queues;
2099         dev_info->max_tx_queues = max_nb_tx_queues;
2100
2101         /**
2102          * If dedicated hw queues enabled for link bonding device in LACP mode
2103          * then we need to reduce the maximum number of data path queues by 1.
2104          */
2105         if (internals->mode == BONDING_MODE_8023AD &&
2106                 internals->mode4.dedicated_queues.enabled == 1) {
2107                 dev_info->max_rx_queues--;
2108                 dev_info->max_tx_queues--;
2109         }
2110
2111         dev_info->min_rx_bufsize = 0;
2112
2113         dev_info->rx_offload_capa = internals->rx_offload_capa;
2114         dev_info->tx_offload_capa = internals->tx_offload_capa;
2115         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2116
2117         dev_info->reta_size = internals->reta_size;
2118 }
2119
2120 static int
2121 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2122 {
2123         int res;
2124         uint16_t i;
2125         struct bond_dev_private *internals = dev->data->dev_private;
2126
2127         /* don't do this while a slave is being added */
2128         rte_spinlock_lock(&internals->lock);
2129
2130         if (on)
2131                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2132         else
2133                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2134
2135         for (i = 0; i < internals->slave_count; i++) {
2136                 uint16_t port_id = internals->slaves[i].port_id;
2137
2138                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2139                 if (res == ENOTSUP)
2140                         RTE_LOG(WARNING, PMD,
2141                                 "Setting VLAN filter on slave port %u not supported.\n",
2142                                 port_id);
2143         }
2144
2145         rte_spinlock_unlock(&internals->lock);
2146         return 0;
2147 }
2148
2149 static int
2150 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2151                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2152                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2153 {
2154         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2155                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2156                                         0, dev->data->numa_node);
2157         if (bd_rx_q == NULL)
2158                 return -1;
2159
2160         bd_rx_q->queue_id = rx_queue_id;
2161         bd_rx_q->dev_private = dev->data->dev_private;
2162
2163         bd_rx_q->nb_rx_desc = nb_rx_desc;
2164
2165         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2166         bd_rx_q->mb_pool = mb_pool;
2167
2168         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2169
2170         return 0;
2171 }
2172
2173 static int
2174 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2175                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2176                 const struct rte_eth_txconf *tx_conf)
2177 {
2178         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2179                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2180                                         0, dev->data->numa_node);
2181
2182         if (bd_tx_q == NULL)
2183                 return -1;
2184
2185         bd_tx_q->queue_id = tx_queue_id;
2186         bd_tx_q->dev_private = dev->data->dev_private;
2187
2188         bd_tx_q->nb_tx_desc = nb_tx_desc;
2189         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2190
2191         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2192
2193         return 0;
2194 }
2195
2196 static void
2197 bond_ethdev_rx_queue_release(void *queue)
2198 {
2199         if (queue == NULL)
2200                 return;
2201
2202         rte_free(queue);
2203 }
2204
2205 static void
2206 bond_ethdev_tx_queue_release(void *queue)
2207 {
2208         if (queue == NULL)
2209                 return;
2210
2211         rte_free(queue);
2212 }
2213
2214 static void
2215 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2216 {
2217         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2218         struct bond_dev_private *internals;
2219
2220         /* Default value for polling slave found is true as we don't want to
2221          * disable the polling thread if we cannot get the lock */
2222         int i, polling_slave_found = 1;
2223
2224         if (cb_arg == NULL)
2225                 return;
2226
2227         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2228         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2229
2230         if (!bonded_ethdev->data->dev_started ||
2231                 !internals->link_status_polling_enabled)
2232                 return;
2233
2234         /* If device is currently being configured then don't check slaves link
2235          * status, wait until next period */
2236         if (rte_spinlock_trylock(&internals->lock)) {
2237                 if (internals->slave_count > 0)
2238                         polling_slave_found = 0;
2239
2240                 for (i = 0; i < internals->slave_count; i++) {
2241                         if (!internals->slaves[i].link_status_poll_enabled)
2242                                 continue;
2243
2244                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2245                         polling_slave_found = 1;
2246
2247                         /* Update slave link status */
2248                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2249                                         internals->slaves[i].link_status_wait_to_complete);
2250
2251                         /* if link status has changed since last checked then call lsc
2252                          * event callback */
2253                         if (slave_ethdev->data->dev_link.link_status !=
2254                                         internals->slaves[i].last_link_status) {
2255                                 internals->slaves[i].last_link_status =
2256                                                 slave_ethdev->data->dev_link.link_status;
2257
2258                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2259                                                 RTE_ETH_EVENT_INTR_LSC,
2260                                                 &bonded_ethdev->data->port_id,
2261                                                 NULL);
2262                         }
2263                 }
2264                 rte_spinlock_unlock(&internals->lock);
2265         }
2266
2267         if (polling_slave_found)
2268                 /* Set alarm to continue monitoring link status of slave ethdev's */
2269                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2270                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2271 }
2272
2273 static int
2274 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2275 {
2276         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2277
2278         struct bond_dev_private *bond_ctx;
2279         struct rte_eth_link slave_link;
2280
2281         uint32_t idx;
2282
2283         bond_ctx = ethdev->data->dev_private;
2284
2285         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2286
2287         if (ethdev->data->dev_started == 0 ||
2288                         bond_ctx->active_slave_count == 0) {
2289                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2290                 return 0;
2291         }
2292
2293         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2294
2295         if (wait_to_complete)
2296                 link_update = rte_eth_link_get;
2297         else
2298                 link_update = rte_eth_link_get_nowait;
2299
2300         switch (bond_ctx->mode) {
2301         case BONDING_MODE_BROADCAST:
2302                 /**
2303                  * Setting link speed to UINT32_MAX to ensure we pick up the
2304                  * value of the first active slave
2305                  */
2306                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2307
2308                 /**
2309                  * link speed is minimum value of all the slaves link speed as
2310                  * packet loss will occur on this slave if transmission at rates
2311                  * greater than this are attempted
2312                  */
2313                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2314                         link_update(bond_ctx->active_slaves[0], &slave_link);
2315
2316                         if (slave_link.link_speed <
2317                                         ethdev->data->dev_link.link_speed)
2318                                 ethdev->data->dev_link.link_speed =
2319                                                 slave_link.link_speed;
2320                 }
2321                 break;
2322         case BONDING_MODE_ACTIVE_BACKUP:
2323                 /* Current primary slave */
2324                 link_update(bond_ctx->current_primary_port, &slave_link);
2325
2326                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2327                 break;
2328         case BONDING_MODE_8023AD:
2329                 ethdev->data->dev_link.link_autoneg =
2330                                 bond_ctx->mode4.slave_link.link_autoneg;
2331                 ethdev->data->dev_link.link_duplex =
2332                                 bond_ctx->mode4.slave_link.link_duplex;
2333                 /* fall through to update link speed */
2334         case BONDING_MODE_ROUND_ROBIN:
2335         case BONDING_MODE_BALANCE:
2336         case BONDING_MODE_TLB:
2337         case BONDING_MODE_ALB:
2338         default:
2339                 /**
2340                  * In theses mode the maximum theoretical link speed is the sum
2341                  * of all the slaves
2342                  */
2343                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2344
2345                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2346                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2347
2348                         ethdev->data->dev_link.link_speed +=
2349                                         slave_link.link_speed;
2350                 }
2351         }
2352
2353
2354         return 0;
2355 }
2356
2357
2358 static int
2359 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2360 {
2361         struct bond_dev_private *internals = dev->data->dev_private;
2362         struct rte_eth_stats slave_stats;
2363         int i, j;
2364
2365         for (i = 0; i < internals->slave_count; i++) {
2366                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2367
2368                 stats->ipackets += slave_stats.ipackets;
2369                 stats->opackets += slave_stats.opackets;
2370                 stats->ibytes += slave_stats.ibytes;
2371                 stats->obytes += slave_stats.obytes;
2372                 stats->imissed += slave_stats.imissed;
2373                 stats->ierrors += slave_stats.ierrors;
2374                 stats->oerrors += slave_stats.oerrors;
2375                 stats->rx_nombuf += slave_stats.rx_nombuf;
2376
2377                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2378                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2379                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2380                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2381                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2382                         stats->q_errors[j] += slave_stats.q_errors[j];
2383                 }
2384
2385         }
2386
2387         return 0;
2388 }
2389
2390 static void
2391 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2392 {
2393         struct bond_dev_private *internals = dev->data->dev_private;
2394         int i;
2395
2396         for (i = 0; i < internals->slave_count; i++)
2397                 rte_eth_stats_reset(internals->slaves[i].port_id);
2398 }
2399
2400 static void
2401 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2402 {
2403         struct bond_dev_private *internals = eth_dev->data->dev_private;
2404         int i;
2405
2406         internals->promiscuous_en = 1;
2407
2408         switch (internals->mode) {
2409         /* Promiscuous mode is propagated to all slaves */
2410         case BONDING_MODE_ROUND_ROBIN:
2411         case BONDING_MODE_BALANCE:
2412         case BONDING_MODE_BROADCAST:
2413                 for (i = 0; i < internals->slave_count; i++)
2414                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2415                 break;
2416         /* In mode4 promiscus mode is managed when slave is added/removed */
2417         case BONDING_MODE_8023AD:
2418                 break;
2419         /* Promiscuous mode is propagated only to primary slave */
2420         case BONDING_MODE_ACTIVE_BACKUP:
2421         case BONDING_MODE_TLB:
2422         case BONDING_MODE_ALB:
2423         default:
2424                 rte_eth_promiscuous_enable(internals->current_primary_port);
2425         }
2426 }
2427
2428 static void
2429 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2430 {
2431         struct bond_dev_private *internals = dev->data->dev_private;
2432         int i;
2433
2434         internals->promiscuous_en = 0;
2435
2436         switch (internals->mode) {
2437         /* Promiscuous mode is propagated to all slaves */
2438         case BONDING_MODE_ROUND_ROBIN:
2439         case BONDING_MODE_BALANCE:
2440         case BONDING_MODE_BROADCAST:
2441                 for (i = 0; i < internals->slave_count; i++)
2442                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2443                 break;
2444         /* In mode4 promiscus mode is set managed when slave is added/removed */
2445         case BONDING_MODE_8023AD:
2446                 break;
2447         /* Promiscuous mode is propagated only to primary slave */
2448         case BONDING_MODE_ACTIVE_BACKUP:
2449         case BONDING_MODE_TLB:
2450         case BONDING_MODE_ALB:
2451         default:
2452                 rte_eth_promiscuous_disable(internals->current_primary_port);
2453         }
2454 }
2455
2456 static void
2457 bond_ethdev_delayed_lsc_propagation(void *arg)
2458 {
2459         if (arg == NULL)
2460                 return;
2461
2462         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2463                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2464 }
2465
2466 int
2467 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2468                 void *param, void *ret_param __rte_unused)
2469 {
2470         struct rte_eth_dev *bonded_eth_dev;
2471         struct bond_dev_private *internals;
2472         struct rte_eth_link link;
2473         int rc = -1;
2474
2475         int i, valid_slave = 0;
2476         uint8_t active_pos;
2477         uint8_t lsc_flag = 0;
2478
2479         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2480                 return rc;
2481
2482         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2483
2484         if (check_for_bonded_ethdev(bonded_eth_dev))
2485                 return rc;
2486
2487         internals = bonded_eth_dev->data->dev_private;
2488
2489         /* If the device isn't started don't handle interrupts */
2490         if (!bonded_eth_dev->data->dev_started)
2491                 return rc;
2492
2493         /* verify that port_id is a valid slave of bonded port */
2494         for (i = 0; i < internals->slave_count; i++) {
2495                 if (internals->slaves[i].port_id == port_id) {
2496                         valid_slave = 1;
2497                         break;
2498                 }
2499         }
2500
2501         if (!valid_slave)
2502                 return rc;
2503
2504         /* Search for port in active port list */
2505         active_pos = find_slave_by_id(internals->active_slaves,
2506                         internals->active_slave_count, port_id);
2507
2508         rte_eth_link_get_nowait(port_id, &link);
2509         if (link.link_status) {
2510                 if (active_pos < internals->active_slave_count)
2511                         return rc;
2512
2513                 /* if no active slave ports then set this port to be primary port */
2514                 if (internals->active_slave_count < 1) {
2515                         /* If first active slave, then change link status */
2516                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2517                         internals->current_primary_port = port_id;
2518                         lsc_flag = 1;
2519
2520                         mac_address_slaves_update(bonded_eth_dev);
2521                 }
2522
2523                 activate_slave(bonded_eth_dev, port_id);
2524
2525                 /* If user has defined the primary port then default to using it */
2526                 if (internals->user_defined_primary_port &&
2527                                 internals->primary_port == port_id)
2528                         bond_ethdev_primary_set(internals, port_id);
2529         } else {
2530                 if (active_pos == internals->active_slave_count)
2531                         return rc;
2532
2533                 /* Remove from active slave list */
2534                 deactivate_slave(bonded_eth_dev, port_id);
2535
2536                 if (internals->active_slave_count < 1)
2537                         lsc_flag = 1;
2538
2539                 /* Update primary id, take first active slave from list or if none
2540                  * available set to -1 */
2541                 if (port_id == internals->current_primary_port) {
2542                         if (internals->active_slave_count > 0)
2543                                 bond_ethdev_primary_set(internals,
2544                                                 internals->active_slaves[0]);
2545                         else
2546                                 internals->current_primary_port = internals->primary_port;
2547                 }
2548         }
2549
2550         /**
2551          * Update bonded device link properties after any change to active
2552          * slaves
2553          */
2554         bond_ethdev_link_update(bonded_eth_dev, 0);
2555
2556         if (lsc_flag) {
2557                 /* Cancel any possible outstanding interrupts if delays are enabled */
2558                 if (internals->link_up_delay_ms > 0 ||
2559                         internals->link_down_delay_ms > 0)
2560                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2561                                         bonded_eth_dev);
2562
2563                 if (bonded_eth_dev->data->dev_link.link_status) {
2564                         if (internals->link_up_delay_ms > 0)
2565                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2566                                                 bond_ethdev_delayed_lsc_propagation,
2567                                                 (void *)bonded_eth_dev);
2568                         else
2569                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2570                                                 RTE_ETH_EVENT_INTR_LSC,
2571                                                 NULL, NULL);
2572
2573                 } else {
2574                         if (internals->link_down_delay_ms > 0)
2575                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2576                                                 bond_ethdev_delayed_lsc_propagation,
2577                                                 (void *)bonded_eth_dev);
2578                         else
2579                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2580                                                 RTE_ETH_EVENT_INTR_LSC,
2581                                                 NULL, NULL);
2582                 }
2583         }
2584         return 0;
2585 }
2586
2587 static int
2588 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2589                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2590 {
2591         unsigned i, j;
2592         int result = 0;
2593         int slave_reta_size;
2594         unsigned reta_count;
2595         struct bond_dev_private *internals = dev->data->dev_private;
2596
2597         if (reta_size != internals->reta_size)
2598                 return -EINVAL;
2599
2600          /* Copy RETA table */
2601         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2602
2603         for (i = 0; i < reta_count; i++) {
2604                 internals->reta_conf[i].mask = reta_conf[i].mask;
2605                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2606                         if ((reta_conf[i].mask >> j) & 0x01)
2607                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2608         }
2609
2610         /* Fill rest of array */
2611         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2612                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2613                                 sizeof(internals->reta_conf[0]) * reta_count);
2614
2615         /* Propagate RETA over slaves */
2616         for (i = 0; i < internals->slave_count; i++) {
2617                 slave_reta_size = internals->slaves[i].reta_size;
2618                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2619                                 &internals->reta_conf[0], slave_reta_size);
2620                 if (result < 0)
2621                         return result;
2622         }
2623
2624         return 0;
2625 }
2626
2627 static int
2628 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2629                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2630 {
2631         int i, j;
2632         struct bond_dev_private *internals = dev->data->dev_private;
2633
2634         if (reta_size != internals->reta_size)
2635                 return -EINVAL;
2636
2637          /* Copy RETA table */
2638         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2639                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2640                         if ((reta_conf[i].mask >> j) & 0x01)
2641                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2642
2643         return 0;
2644 }
2645
2646 static int
2647 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2648                 struct rte_eth_rss_conf *rss_conf)
2649 {
2650         int i, result = 0;
2651         struct bond_dev_private *internals = dev->data->dev_private;
2652         struct rte_eth_rss_conf bond_rss_conf;
2653
2654         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2655
2656         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2657
2658         if (bond_rss_conf.rss_hf != 0)
2659                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2660
2661         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2662                         sizeof(internals->rss_key)) {
2663                 if (bond_rss_conf.rss_key_len == 0)
2664                         bond_rss_conf.rss_key_len = 40;
2665                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2666                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2667                                 internals->rss_key_len);
2668         }
2669
2670         for (i = 0; i < internals->slave_count; i++) {
2671                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2672                                 &bond_rss_conf);
2673                 if (result < 0)
2674                         return result;
2675         }
2676
2677         return 0;
2678 }
2679
2680 static int
2681 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2682                 struct rte_eth_rss_conf *rss_conf)
2683 {
2684         struct bond_dev_private *internals = dev->data->dev_private;
2685
2686         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2687         rss_conf->rss_key_len = internals->rss_key_len;
2688         if (rss_conf->rss_key)
2689                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2690
2691         return 0;
2692 }
2693
2694 const struct eth_dev_ops default_dev_ops = {
2695         .dev_start            = bond_ethdev_start,
2696         .dev_stop             = bond_ethdev_stop,
2697         .dev_close            = bond_ethdev_close,
2698         .dev_configure        = bond_ethdev_configure,
2699         .dev_infos_get        = bond_ethdev_info,
2700         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2701         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2702         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2703         .rx_queue_release     = bond_ethdev_rx_queue_release,
2704         .tx_queue_release     = bond_ethdev_tx_queue_release,
2705         .link_update          = bond_ethdev_link_update,
2706         .stats_get            = bond_ethdev_stats_get,
2707         .stats_reset          = bond_ethdev_stats_reset,
2708         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2709         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2710         .reta_update          = bond_ethdev_rss_reta_update,
2711         .reta_query           = bond_ethdev_rss_reta_query,
2712         .rss_hash_update      = bond_ethdev_rss_hash_update,
2713         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2714 };
2715
2716 static int
2717 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2718 {
2719         const char *name = rte_vdev_device_name(dev);
2720         uint8_t socket_id = dev->device.numa_node;
2721         struct bond_dev_private *internals = NULL;
2722         struct rte_eth_dev *eth_dev = NULL;
2723         uint32_t vlan_filter_bmp_size;
2724
2725         /* now do all data allocation - for eth_dev structure, dummy pci driver
2726          * and internal (private) data
2727          */
2728
2729         /* reserve an ethdev entry */
2730         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2731         if (eth_dev == NULL) {
2732                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2733                 goto err;
2734         }
2735
2736         internals = eth_dev->data->dev_private;
2737         eth_dev->data->nb_rx_queues = (uint16_t)1;
2738         eth_dev->data->nb_tx_queues = (uint16_t)1;
2739
2740         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2741                         socket_id);
2742         if (eth_dev->data->mac_addrs == NULL) {
2743                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2744                 goto err;
2745         }
2746
2747         eth_dev->dev_ops = &default_dev_ops;
2748         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2749
2750         rte_spinlock_init(&internals->lock);
2751
2752         internals->port_id = eth_dev->data->port_id;
2753         internals->mode = BONDING_MODE_INVALID;
2754         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2755         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2756         internals->xmit_hash = xmit_l2_hash;
2757         internals->user_defined_mac = 0;
2758
2759         internals->link_status_polling_enabled = 0;
2760
2761         internals->link_status_polling_interval_ms =
2762                 DEFAULT_POLLING_INTERVAL_10_MS;
2763         internals->link_down_delay_ms = 0;
2764         internals->link_up_delay_ms = 0;
2765
2766         internals->slave_count = 0;
2767         internals->active_slave_count = 0;
2768         internals->rx_offload_capa = 0;
2769         internals->tx_offload_capa = 0;
2770         internals->candidate_max_rx_pktlen = 0;
2771         internals->max_rx_pktlen = 0;
2772
2773         /* Initially allow to choose any offload type */
2774         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2775
2776         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2777         memset(internals->slaves, 0, sizeof(internals->slaves));
2778
2779         /* Set mode 4 default configuration */
2780         bond_mode_8023ad_setup(eth_dev, NULL);
2781         if (bond_ethdev_mode_set(eth_dev, mode)) {
2782                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2783                                  eth_dev->data->port_id, mode);
2784                 goto err;
2785         }
2786
2787         vlan_filter_bmp_size =
2788                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2789         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2790                                                    RTE_CACHE_LINE_SIZE);
2791         if (internals->vlan_filter_bmpmem == NULL) {
2792                 RTE_BOND_LOG(ERR,
2793                              "Failed to allocate vlan bitmap for bonded device %u\n",
2794                              eth_dev->data->port_id);
2795                 goto err;
2796         }
2797
2798         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2799                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2800         if (internals->vlan_filter_bmp == NULL) {
2801                 RTE_BOND_LOG(ERR,
2802                              "Failed to init vlan bitmap for bonded device %u\n",
2803                              eth_dev->data->port_id);
2804                 rte_free(internals->vlan_filter_bmpmem);
2805                 goto err;
2806         }
2807
2808         return eth_dev->data->port_id;
2809
2810 err:
2811         rte_free(internals);
2812         if (eth_dev != NULL) {
2813                 rte_free(eth_dev->data->mac_addrs);
2814                 rte_eth_dev_release_port(eth_dev);
2815         }
2816         return -1;
2817 }
2818
2819 static int
2820 bond_probe(struct rte_vdev_device *dev)
2821 {
2822         const char *name;
2823         struct bond_dev_private *internals;
2824         struct rte_kvargs *kvlist;
2825         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2826         int  arg_count, port_id;
2827         uint8_t agg_mode;
2828
2829         if (!dev)
2830                 return -EINVAL;
2831
2832         name = rte_vdev_device_name(dev);
2833         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2834
2835         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2836                 pmd_bond_init_valid_arguments);
2837         if (kvlist == NULL)
2838                 return -1;
2839
2840         /* Parse link bonding mode */
2841         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2842                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2843                                 &bond_ethdev_parse_slave_mode_kvarg,
2844                                 &bonding_mode) != 0) {
2845                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2846                                         name);
2847                         goto parse_error;
2848                 }
2849         } else {
2850                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2851                                 "device %s\n", name);
2852                 goto parse_error;
2853         }
2854
2855         /* Parse socket id to create bonding device on */
2856         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2857         if (arg_count == 1) {
2858                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2859                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2860                                 != 0) {
2861                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2862                                         "bonded device %s\n", name);
2863                         goto parse_error;
2864                 }
2865         } else if (arg_count > 1) {
2866                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2867                                 "bonded device %s\n", name);
2868                 goto parse_error;
2869         } else {
2870                 socket_id = rte_socket_id();
2871         }
2872
2873         dev->device.numa_node = socket_id;
2874
2875         /* Create link bonding eth device */
2876         port_id = bond_alloc(dev, bonding_mode);
2877         if (port_id < 0) {
2878                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2879                                 "socket %u.\n", name, bonding_mode, socket_id);
2880                 goto parse_error;
2881         }
2882         internals = rte_eth_devices[port_id].data->dev_private;
2883         internals->kvlist = kvlist;
2884
2885
2886         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2887                 if (rte_kvargs_process(kvlist,
2888                                 PMD_BOND_AGG_MODE_KVARG,
2889                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
2890                                 &agg_mode) != 0) {
2891                         RTE_LOG(ERR, EAL,
2892                                         "Failed to parse agg selection mode for bonded device %s\n",
2893                                         name);
2894                         goto parse_error;
2895                 }
2896
2897                 if (internals->mode == BONDING_MODE_8023AD)
2898                         rte_eth_bond_8023ad_agg_selection_set(port_id,
2899                                         agg_mode);
2900         } else {
2901                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2902         }
2903
2904         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2905                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2906         return 0;
2907
2908 parse_error:
2909         rte_kvargs_free(kvlist);
2910
2911         return -1;
2912 }
2913
2914 static int
2915 bond_remove(struct rte_vdev_device *dev)
2916 {
2917         struct rte_eth_dev *eth_dev;
2918         struct bond_dev_private *internals;
2919         const char *name;
2920
2921         if (!dev)
2922                 return -EINVAL;
2923
2924         name = rte_vdev_device_name(dev);
2925         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2926
2927         /* now free all data allocation - for eth_dev structure,
2928          * dummy pci driver and internal (private) data
2929          */
2930
2931         /* find an ethdev entry */
2932         eth_dev = rte_eth_dev_allocated(name);
2933         if (eth_dev == NULL)
2934                 return -ENODEV;
2935
2936         RTE_ASSERT(eth_dev->device == &dev->device);
2937
2938         internals = eth_dev->data->dev_private;
2939         if (internals->slave_count != 0)
2940                 return -EBUSY;
2941
2942         if (eth_dev->data->dev_started == 1) {
2943                 bond_ethdev_stop(eth_dev);
2944                 bond_ethdev_close(eth_dev);
2945         }
2946
2947         eth_dev->dev_ops = NULL;
2948         eth_dev->rx_pkt_burst = NULL;
2949         eth_dev->tx_pkt_burst = NULL;
2950
2951         internals = eth_dev->data->dev_private;
2952         rte_bitmap_free(internals->vlan_filter_bmp);
2953         rte_free(internals->vlan_filter_bmpmem);
2954         rte_free(eth_dev->data->dev_private);
2955         rte_free(eth_dev->data->mac_addrs);
2956
2957         rte_eth_dev_release_port(eth_dev);
2958
2959         return 0;
2960 }
2961
2962 /* this part will resolve the slave portids after all the other pdev and vdev
2963  * have been allocated */
2964 static int
2965 bond_ethdev_configure(struct rte_eth_dev *dev)
2966 {
2967         const char *name = dev->device->name;
2968         struct bond_dev_private *internals = dev->data->dev_private;
2969         struct rte_kvargs *kvlist = internals->kvlist;
2970         int arg_count;
2971         uint16_t port_id = dev - rte_eth_devices;
2972         uint8_t agg_mode;
2973
2974         static const uint8_t default_rss_key[40] = {
2975                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2976                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2977                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2978                 0xBE, 0xAC, 0x01, 0xFA
2979         };
2980
2981         unsigned i, j;
2982
2983         /* If RSS is enabled, fill table and key with default values */
2984         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2985                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2986                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2987                 memcpy(internals->rss_key, default_rss_key, 40);
2988
2989                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2990                         internals->reta_conf[i].mask = ~0LL;
2991                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2992                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2993                 }
2994         }
2995
2996         /* set the max_rx_pktlen */
2997         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2998
2999         /*
3000          * if no kvlist, it means that this bonded device has been created
3001          * through the bonding api.
3002          */
3003         if (!kvlist)
3004                 return 0;
3005
3006         /* Parse MAC address for bonded device */
3007         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3008         if (arg_count == 1) {
3009                 struct ether_addr bond_mac;
3010
3011                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3012                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3013                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3014                                         name);
3015                         return -1;
3016                 }
3017
3018                 /* Set MAC address */
3019                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3020                         RTE_LOG(ERR, EAL,
3021                                         "Failed to set mac address on bonded device %s\n",
3022                                         name);
3023                         return -1;
3024                 }
3025         } else if (arg_count > 1) {
3026                 RTE_LOG(ERR, EAL,
3027                                 "MAC address can be specified only once for bonded device %s\n",
3028                                 name);
3029                 return -1;
3030         }
3031
3032         /* Parse/set balance mode transmit policy */
3033         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3034         if (arg_count == 1) {
3035                 uint8_t xmit_policy;
3036
3037                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3038                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3039                                                 0) {
3040                         RTE_LOG(INFO, EAL,
3041                                         "Invalid xmit policy specified for bonded device %s\n",
3042                                         name);
3043                         return -1;
3044                 }
3045
3046                 /* Set balance mode transmit policy*/
3047                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3048                         RTE_LOG(ERR, EAL,
3049                                         "Failed to set balance xmit policy on bonded device %s\n",
3050                                         name);
3051                         return -1;
3052                 }
3053         } else if (arg_count > 1) {
3054                 RTE_LOG(ERR, EAL,
3055                                 "Transmit policy can be specified only once for bonded device"
3056                                 " %s\n", name);
3057                 return -1;
3058         }
3059
3060         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3061                 if (rte_kvargs_process(kvlist,
3062                                 PMD_BOND_AGG_MODE_KVARG,
3063                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3064                                 &agg_mode) != 0) {
3065                         RTE_LOG(ERR, EAL,
3066                                         "Failed to parse agg selection mode for bonded device %s\n",
3067                                         name);
3068                 }
3069                 if (internals->mode == BONDING_MODE_8023AD)
3070                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3071                                                 agg_mode);
3072         }
3073
3074         /* Parse/add slave ports to bonded device */
3075         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3076                 struct bond_ethdev_slave_ports slave_ports;
3077                 unsigned i;
3078
3079                 memset(&slave_ports, 0, sizeof(slave_ports));
3080
3081                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3082                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3083                         RTE_LOG(ERR, EAL,
3084                                         "Failed to parse slave ports for bonded device %s\n",
3085                                         name);
3086                         return -1;
3087                 }
3088
3089                 for (i = 0; i < slave_ports.slave_count; i++) {
3090                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3091                                 RTE_LOG(ERR, EAL,
3092                                                 "Failed to add port %d as slave to bonded device %s\n",
3093                                                 slave_ports.slaves[i], name);
3094                         }
3095                 }
3096
3097         } else {
3098                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3099                 return -1;
3100         }
3101
3102         /* Parse/set primary slave port id*/
3103         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3104         if (arg_count == 1) {
3105                 uint16_t primary_slave_port_id;
3106
3107                 if (rte_kvargs_process(kvlist,
3108                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3109                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3110                                 &primary_slave_port_id) < 0) {
3111                         RTE_LOG(INFO, EAL,
3112                                         "Invalid primary slave port id specified for bonded device"
3113                                         " %s\n", name);
3114                         return -1;
3115                 }
3116
3117                 /* Set balance mode transmit policy*/
3118                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3119                                 != 0) {
3120                         RTE_LOG(ERR, EAL,
3121                                         "Failed to set primary slave port %d on bonded device %s\n",
3122                                         primary_slave_port_id, name);
3123                         return -1;
3124                 }
3125         } else if (arg_count > 1) {
3126                 RTE_LOG(INFO, EAL,
3127                                 "Primary slave can be specified only once for bonded device"
3128                                 " %s\n", name);
3129                 return -1;
3130         }
3131
3132         /* Parse link status monitor polling interval */
3133         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3134         if (arg_count == 1) {
3135                 uint32_t lsc_poll_interval_ms;
3136
3137                 if (rte_kvargs_process(kvlist,
3138                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3139                                 &bond_ethdev_parse_time_ms_kvarg,
3140                                 &lsc_poll_interval_ms) < 0) {
3141                         RTE_LOG(INFO, EAL,
3142                                         "Invalid lsc polling interval value specified for bonded"
3143                                         " device %s\n", name);
3144                         return -1;
3145                 }
3146
3147                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3148                                 != 0) {
3149                         RTE_LOG(ERR, EAL,
3150                                         "Failed to set lsc monitor polling interval (%u ms) on"
3151                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3152                         return -1;
3153                 }
3154         } else if (arg_count > 1) {
3155                 RTE_LOG(INFO, EAL,
3156                                 "LSC polling interval can be specified only once for bonded"
3157                                 " device %s\n", name);
3158                 return -1;
3159         }
3160
3161         /* Parse link up interrupt propagation delay */
3162         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3163         if (arg_count == 1) {
3164                 uint32_t link_up_delay_ms;
3165
3166                 if (rte_kvargs_process(kvlist,
3167                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3168                                 &bond_ethdev_parse_time_ms_kvarg,
3169                                 &link_up_delay_ms) < 0) {
3170                         RTE_LOG(INFO, EAL,
3171                                         "Invalid link up propagation delay value specified for"
3172                                         " bonded device %s\n", name);
3173                         return -1;
3174                 }
3175
3176                 /* Set balance mode transmit policy*/
3177                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3178                                 != 0) {
3179                         RTE_LOG(ERR, EAL,
3180                                         "Failed to set link up propagation delay (%u ms) on bonded"
3181                                         " device %s\n", link_up_delay_ms, name);
3182                         return -1;
3183                 }
3184         } else if (arg_count > 1) {
3185                 RTE_LOG(INFO, EAL,
3186                                 "Link up propagation delay can be specified only once for"
3187                                 " bonded device %s\n", name);
3188                 return -1;
3189         }
3190
3191         /* Parse link down interrupt propagation delay */
3192         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3193         if (arg_count == 1) {
3194                 uint32_t link_down_delay_ms;
3195
3196                 if (rte_kvargs_process(kvlist,
3197                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3198                                 &bond_ethdev_parse_time_ms_kvarg,
3199                                 &link_down_delay_ms) < 0) {
3200                         RTE_LOG(INFO, EAL,
3201                                         "Invalid link down propagation delay value specified for"
3202                                         " bonded device %s\n", name);
3203                         return -1;
3204                 }
3205
3206                 /* Set balance mode transmit policy*/
3207                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3208                                 != 0) {
3209                         RTE_LOG(ERR, EAL,
3210                                         "Failed to set link down propagation delay (%u ms) on"
3211                                         " bonded device %s\n", link_down_delay_ms, name);
3212                         return -1;
3213                 }
3214         } else if (arg_count > 1) {
3215                 RTE_LOG(INFO, EAL,
3216                                 "Link down propagation delay can be specified only once for"
3217                                 " bonded device %s\n", name);
3218                 return -1;
3219         }
3220
3221         return 0;
3222 }
3223
3224 struct rte_vdev_driver pmd_bond_drv = {
3225         .probe = bond_probe,
3226         .remove = bond_remove,
3227 };
3228
3229 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3230 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3231
3232 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3233         "slave=<ifc> "
3234         "primary=<ifc> "
3235         "mode=[0-6] "
3236         "xmit_policy=[l2 | l23 | l34] "
3237         "agg_mode=[count | stable | bandwidth] "
3238         "socket_id=<int> "
3239         "mac=<mac addr> "
3240         "lsc_poll_period_ms=<int> "
3241         "up_delay=<int> "
3242         "down_delay=<int>");