6abd9581cc676794c10b1e6fc19c63d253a30532
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22
23 #include "rte_eth_bond.h"
24 #include "rte_eth_bond_private.h"
25 #include "rte_eth_bond_8023ad_private.h"
26
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35
36 static inline size_t
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39         size_t vlan_offset = 0;
40
41         if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42                 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43                 struct rte_vlan_hdr *vlan_hdr =
44                         (struct rte_vlan_hdr *)(eth_hdr + 1);
45
46                 vlan_offset = sizeof(struct rte_vlan_hdr);
47                 *proto = vlan_hdr->eth_proto;
48
49                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50                         vlan_hdr = vlan_hdr + 1;
51                         *proto = vlan_hdr->eth_proto;
52                         vlan_offset += sizeof(struct rte_vlan_hdr);
53                 }
54         }
55         return vlan_offset;
56 }
57
58 static uint16_t
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61         struct bond_dev_private *internals;
62
63         uint16_t num_rx_total = 0;
64         uint16_t slave_count;
65         uint16_t active_slave;
66         int i;
67
68         /* Cast to structure, containing bonded device's port id and queue id */
69         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70         internals = bd_rx_q->dev_private;
71         slave_count = internals->active_slave_count;
72         active_slave = internals->active_slave;
73
74         for (i = 0; i < slave_count && nb_pkts; i++) {
75                 uint16_t num_rx_slave;
76
77                 /* Offset of pointer to *bufs increases as packets are received
78                  * from other slaves */
79                 num_rx_slave =
80                         rte_eth_rx_burst(internals->active_slaves[active_slave],
81                                          bd_rx_q->queue_id,
82                                          bufs + num_rx_total, nb_pkts);
83                 num_rx_total += num_rx_slave;
84                 nb_pkts -= num_rx_slave;
85                 if (++active_slave == slave_count)
86                         active_slave = 0;
87         }
88
89         if (++internals->active_slave >= slave_count)
90                 internals->active_slave = 0;
91         return num_rx_total;
92 }
93
94 static uint16_t
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96                 uint16_t nb_pkts)
97 {
98         struct bond_dev_private *internals;
99
100         /* Cast to structure, containing bonded device's port id and queue id */
101         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102
103         internals = bd_rx_q->dev_private;
104
105         return rte_eth_rx_burst(internals->current_primary_port,
106                         bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112         const uint16_t ether_type_slow_be =
113                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114
115         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116                 (ethertype == ether_type_slow_be &&
117                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125         .dst.addr_bytes = { 0 },
126         .src.addr_bytes = { 0 },
127         .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131         .dst.addr_bytes = { 0 },
132         .src.addr_bytes = { 0 },
133         .type = 0xFFFF,
134 };
135
136 static struct rte_flow_item flow_item_8023ad[] = {
137         {
138                 .type = RTE_FLOW_ITEM_TYPE_ETH,
139                 .spec = &flow_item_eth_type_8023ad,
140                 .last = NULL,
141                 .mask = &flow_item_eth_mask_type_8023ad,
142         },
143         {
144                 .type = RTE_FLOW_ITEM_TYPE_END,
145                 .spec = NULL,
146                 .last = NULL,
147                 .mask = NULL,
148         }
149 };
150
151 const struct rte_flow_attr flow_attr_8023ad = {
152         .group = 0,
153         .priority = 0,
154         .ingress = 1,
155         .egress = 0,
156         .reserved = 0,
157 };
158
159 int
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161                 uint16_t slave_port) {
162         struct rte_eth_dev_info slave_info;
163         struct rte_flow_error error;
164         struct bond_dev_private *internals = bond_dev->data->dev_private;
165
166         const struct rte_flow_action_queue lacp_queue_conf = {
167                 .index = 0,
168         };
169
170         const struct rte_flow_action actions[] = {
171                 {
172                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173                         .conf = &lacp_queue_conf
174                 },
175                 {
176                         .type = RTE_FLOW_ACTION_TYPE_END,
177                 }
178         };
179
180         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181                         flow_item_8023ad, actions, &error);
182         if (ret < 0) {
183                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184                                 __func__, error.message, slave_port,
185                                 internals->mode4.dedicated_queues.rx_qid);
186                 return -1;
187         }
188
189         rte_eth_dev_info_get(slave_port, &slave_info);
190         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
191                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
192                 RTE_BOND_LOG(ERR,
193                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
194                         __func__, slave_port);
195                 return -1;
196         }
197
198         return 0;
199 }
200
201 int
202 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
203         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
204         struct bond_dev_private *internals = bond_dev->data->dev_private;
205         struct rte_eth_dev_info bond_info;
206         uint16_t idx;
207
208         /* Verify if all slaves in bonding supports flow director and */
209         if (internals->slave_count > 0) {
210                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
211
212                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
214
215                 for (idx = 0; idx < internals->slave_count; idx++) {
216                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
217                                         internals->slaves[idx].port_id) != 0)
218                                 return -1;
219                 }
220         }
221
222         return 0;
223 }
224
225 int
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
227
228         struct rte_flow_error error;
229         struct bond_dev_private *internals = bond_dev->data->dev_private;
230         struct rte_flow_action_queue lacp_queue_conf = {
231                 .index = internals->mode4.dedicated_queues.rx_qid,
232         };
233
234         const struct rte_flow_action actions[] = {
235                 {
236                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
237                         .conf = &lacp_queue_conf
238                 },
239                 {
240                         .type = RTE_FLOW_ACTION_TYPE_END,
241                 }
242         };
243
244         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
245                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
246         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
247                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
248                                 "(slave_port=%d queue_id=%d)",
249                                 error.message, slave_port,
250                                 internals->mode4.dedicated_queues.rx_qid);
251                 return -1;
252         }
253
254         return 0;
255 }
256
257 static uint16_t
258 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
259                 uint16_t nb_pkts)
260 {
261         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
262         struct bond_dev_private *internals = bd_rx_q->dev_private;
263         uint16_t num_rx_total = 0;      /* Total number of received packets */
264         uint16_t slaves[RTE_MAX_ETHPORTS];
265         uint16_t slave_count;
266         uint16_t active_slave;
267         uint16_t i;
268
269         /* Copy slave list to protect against slave up/down changes during tx
270          * bursting */
271         slave_count = internals->active_slave_count;
272         active_slave = internals->active_slave;
273         memcpy(slaves, internals->active_slaves,
274                         sizeof(internals->active_slaves[0]) * slave_count);
275
276         for (i = 0; i < slave_count && nb_pkts; i++) {
277                 uint16_t num_rx_slave;
278
279                 /* Read packets from this slave */
280                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
281                                                 bd_rx_q->queue_id,
282                                                 bufs + num_rx_total, nb_pkts);
283                 num_rx_total += num_rx_slave;
284                 nb_pkts -= num_rx_slave;
285
286                 if (++active_slave == slave_count)
287                         active_slave = 0;
288         }
289
290         if (++internals->active_slave >= slave_count)
291                 internals->active_slave = 0;
292
293         return num_rx_total;
294 }
295
296 static uint16_t
297 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
298                 uint16_t nb_pkts)
299 {
300         /* Cast to structure, containing bonded device's port id and queue id */
301         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
302         struct bond_dev_private *internals = bd_rx_q->dev_private;
303         struct rte_eth_dev *bonded_eth_dev =
304                                         &rte_eth_devices[internals->port_id];
305         struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
306         struct rte_ether_hdr *hdr;
307
308         const uint16_t ether_type_slow_be =
309                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
310         uint16_t num_rx_total = 0;      /* Total number of received packets */
311         uint16_t slaves[RTE_MAX_ETHPORTS];
312         uint16_t slave_count, idx;
313
314         uint8_t collecting;  /* current slave collecting status */
315         const uint8_t promisc = internals->promiscuous_en;
316         uint8_t subtype;
317         uint16_t i;
318         uint16_t j;
319         uint16_t k;
320
321         /* Copy slave list to protect against slave up/down changes during tx
322          * bursting */
323         slave_count = internals->active_slave_count;
324         memcpy(slaves, internals->active_slaves,
325                         sizeof(internals->active_slaves[0]) * slave_count);
326
327         idx = internals->active_slave;
328         if (idx >= slave_count) {
329                 internals->active_slave = 0;
330                 idx = 0;
331         }
332         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
333                 j = num_rx_total;
334                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
335                                          COLLECTING);
336
337                 /* Read packets from this slave */
338                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
339                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
340
341                 for (k = j; k < 2 && k < num_rx_total; k++)
342                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
343
344                 /* Handle slow protocol packets. */
345                 while (j < num_rx_total) {
346
347                         /* If packet is not pure L2 and is known, skip it */
348                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
349                                 j++;
350                                 continue;
351                         }
352
353                         if (j + 3 < num_rx_total)
354                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
355
356                         hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
357                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
358
359                         /* Remove packet from array if it is slow packet or slave is not
360                          * in collecting state or bonding interface is not in promiscuous
361                          * mode and packet address does not match. */
362                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
363                                 !collecting ||
364                                 (!promisc &&
365                                  !rte_is_multicast_ether_addr(&hdr->d_addr) &&
366                                  !rte_is_same_ether_addr(bond_mac,
367                                                      &hdr->d_addr)))) {
368
369                                 if (hdr->ether_type == ether_type_slow_be) {
370                                         bond_mode_8023ad_handle_slow_pkt(
371                                             internals, slaves[idx], bufs[j]);
372                                 } else
373                                         rte_pktmbuf_free(bufs[j]);
374
375                                 /* Packet is managed by mode 4 or dropped, shift the array */
376                                 num_rx_total--;
377                                 if (j < num_rx_total) {
378                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
379                                                 (num_rx_total - j));
380                                 }
381                         } else
382                                 j++;
383                 }
384                 if (unlikely(++idx == slave_count))
385                         idx = 0;
386         }
387
388         if (++internals->active_slave >= slave_count)
389                 internals->active_slave = 0;
390
391         return num_rx_total;
392 }
393
394 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
395 uint32_t burstnumberRX;
396 uint32_t burstnumberTX;
397
398 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
399
400 static void
401 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
402 {
403         switch (arp_op) {
404         case RTE_ARP_OP_REQUEST:
405                 strlcpy(buf, "ARP Request", buf_len);
406                 return;
407         case RTE_ARP_OP_REPLY:
408                 strlcpy(buf, "ARP Reply", buf_len);
409                 return;
410         case RTE_ARP_OP_REVREQUEST:
411                 strlcpy(buf, "Reverse ARP Request", buf_len);
412                 return;
413         case RTE_ARP_OP_REVREPLY:
414                 strlcpy(buf, "Reverse ARP Reply", buf_len);
415                 return;
416         case RTE_ARP_OP_INVREQUEST:
417                 strlcpy(buf, "Peer Identify Request", buf_len);
418                 return;
419         case RTE_ARP_OP_INVREPLY:
420                 strlcpy(buf, "Peer Identify Reply", buf_len);
421                 return;
422         default:
423                 break;
424         }
425         strlcpy(buf, "Unknown", buf_len);
426         return;
427 }
428 #endif
429 #define MaxIPv4String   16
430 static void
431 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
432 {
433         uint32_t ipv4_addr;
434
435         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
436         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
437                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
438                 ipv4_addr & 0xFF);
439 }
440
441 #define MAX_CLIENTS_NUMBER      128
442 uint8_t active_clients;
443 struct client_stats_t {
444         uint16_t port;
445         uint32_t ipv4_addr;
446         uint32_t ipv4_rx_packets;
447         uint32_t ipv4_tx_packets;
448 };
449 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
450
451 static void
452 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
453 {
454         int i = 0;
455
456         for (; i < MAX_CLIENTS_NUMBER; i++)     {
457                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
458                         /* Just update RX packets number for this client */
459                         if (TXorRXindicator == &burstnumberRX)
460                                 client_stats[i].ipv4_rx_packets++;
461                         else
462                                 client_stats[i].ipv4_tx_packets++;
463                         return;
464                 }
465         }
466         /* We have a new client. Insert him to the table, and increment stats */
467         if (TXorRXindicator == &burstnumberRX)
468                 client_stats[active_clients].ipv4_rx_packets++;
469         else
470                 client_stats[active_clients].ipv4_tx_packets++;
471         client_stats[active_clients].ipv4_addr = addr;
472         client_stats[active_clients].port = port;
473         active_clients++;
474
475 }
476
477 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
478 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
479         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
480                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
481                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
482                 info,                                                   \
483                 port,                                                   \
484                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
485                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
486                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
487                 src_ip,                                                 \
488                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
489                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
490                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
491                 dst_ip,                                                 \
492                 arp_op, ++burstnumber)
493 #endif
494
495 static void
496 mode6_debug(const char __attribute__((unused)) *info,
497         struct rte_ether_hdr *eth_h, uint16_t port,
498         uint32_t __attribute__((unused)) *burstnumber)
499 {
500         struct rte_ipv4_hdr *ipv4_h;
501 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
502         struct rte_arp_hdr *arp_h;
503         char dst_ip[16];
504         char ArpOp[24];
505         char buf[16];
506 #endif
507         char src_ip[16];
508
509         uint16_t ether_type = eth_h->ether_type;
510         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
511
512 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
513         strlcpy(buf, info, 16);
514 #endif
515
516         if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
517                 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
518                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
519 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
520                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
521                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
522 #endif
523                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
524         }
525 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
526         else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
527                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
528                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
529                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
530                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
531                                 ArpOp, sizeof(ArpOp));
532                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
533         }
534 #endif
535 }
536 #endif
537
538 static uint16_t
539 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
540 {
541         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
542         struct bond_dev_private *internals = bd_tx_q->dev_private;
543         struct rte_ether_hdr *eth_h;
544         uint16_t ether_type, offset;
545         uint16_t nb_recv_pkts;
546         int i;
547
548         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
549
550         for (i = 0; i < nb_recv_pkts; i++) {
551                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
552                 ether_type = eth_h->ether_type;
553                 offset = get_vlan_offset(eth_h, &ether_type);
554
555                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
556 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
557                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
558 #endif
559                         bond_mode_alb_arp_recv(eth_h, offset, internals);
560                 }
561 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
562                 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
563                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
564 #endif
565         }
566
567         return nb_recv_pkts;
568 }
569
570 static uint16_t
571 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
572                 uint16_t nb_pkts)
573 {
574         struct bond_dev_private *internals;
575         struct bond_tx_queue *bd_tx_q;
576
577         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
578         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
579
580         uint16_t num_of_slaves;
581         uint16_t slaves[RTE_MAX_ETHPORTS];
582
583         uint16_t num_tx_total = 0, num_tx_slave;
584
585         static int slave_idx = 0;
586         int i, cslave_idx = 0, tx_fail_total = 0;
587
588         bd_tx_q = (struct bond_tx_queue *)queue;
589         internals = bd_tx_q->dev_private;
590
591         /* Copy slave list to protect against slave up/down changes during tx
592          * bursting */
593         num_of_slaves = internals->active_slave_count;
594         memcpy(slaves, internals->active_slaves,
595                         sizeof(internals->active_slaves[0]) * num_of_slaves);
596
597         if (num_of_slaves < 1)
598                 return num_tx_total;
599
600         /* Populate slaves mbuf with which packets are to be sent on it  */
601         for (i = 0; i < nb_pkts; i++) {
602                 cslave_idx = (slave_idx + i) % num_of_slaves;
603                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
604         }
605
606         /* increment current slave index so the next call to tx burst starts on the
607          * next slave */
608         slave_idx = ++cslave_idx;
609
610         /* Send packet burst on each slave device */
611         for (i = 0; i < num_of_slaves; i++) {
612                 if (slave_nb_pkts[i] > 0) {
613                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
614                                         slave_bufs[i], slave_nb_pkts[i]);
615
616                         /* if tx burst fails move packets to end of bufs */
617                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
618                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
619
620                                 tx_fail_total += tx_fail_slave;
621
622                                 memcpy(&bufs[nb_pkts - tx_fail_total],
623                                        &slave_bufs[i][num_tx_slave],
624                                        tx_fail_slave * sizeof(bufs[0]));
625                         }
626                         num_tx_total += num_tx_slave;
627                 }
628         }
629
630         return num_tx_total;
631 }
632
633 static uint16_t
634 bond_ethdev_tx_burst_active_backup(void *queue,
635                 struct rte_mbuf **bufs, uint16_t nb_pkts)
636 {
637         struct bond_dev_private *internals;
638         struct bond_tx_queue *bd_tx_q;
639
640         bd_tx_q = (struct bond_tx_queue *)queue;
641         internals = bd_tx_q->dev_private;
642
643         if (internals->active_slave_count < 1)
644                 return 0;
645
646         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
647                         bufs, nb_pkts);
648 }
649
650 static inline uint16_t
651 ether_hash(struct rte_ether_hdr *eth_hdr)
652 {
653         unaligned_uint16_t *word_src_addr =
654                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
655         unaligned_uint16_t *word_dst_addr =
656                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
657
658         return (word_src_addr[0] ^ word_dst_addr[0]) ^
659                         (word_src_addr[1] ^ word_dst_addr[1]) ^
660                         (word_src_addr[2] ^ word_dst_addr[2]);
661 }
662
663 static inline uint32_t
664 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
665 {
666         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
667 }
668
669 static inline uint32_t
670 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
671 {
672         unaligned_uint32_t *word_src_addr =
673                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
674         unaligned_uint32_t *word_dst_addr =
675                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
676
677         return (word_src_addr[0] ^ word_dst_addr[0]) ^
678                         (word_src_addr[1] ^ word_dst_addr[1]) ^
679                         (word_src_addr[2] ^ word_dst_addr[2]) ^
680                         (word_src_addr[3] ^ word_dst_addr[3]);
681 }
682
683
684 void
685 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
686                 uint16_t slave_count, uint16_t *slaves)
687 {
688         struct rte_ether_hdr *eth_hdr;
689         uint32_t hash;
690         int i;
691
692         for (i = 0; i < nb_pkts; i++) {
693                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
694
695                 hash = ether_hash(eth_hdr);
696
697                 slaves[i] = (hash ^= hash >> 8) % slave_count;
698         }
699 }
700
701 void
702 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
703                 uint16_t slave_count, uint16_t *slaves)
704 {
705         uint16_t i;
706         struct rte_ether_hdr *eth_hdr;
707         uint16_t proto;
708         size_t vlan_offset;
709         uint32_t hash, l3hash;
710
711         for (i = 0; i < nb_pkts; i++) {
712                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
713                 l3hash = 0;
714
715                 proto = eth_hdr->ether_type;
716                 hash = ether_hash(eth_hdr);
717
718                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
719
720                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
721                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
722                                         ((char *)(eth_hdr + 1) + vlan_offset);
723                         l3hash = ipv4_hash(ipv4_hdr);
724
725                 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
726                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
727                                         ((char *)(eth_hdr + 1) + vlan_offset);
728                         l3hash = ipv6_hash(ipv6_hdr);
729                 }
730
731                 hash = hash ^ l3hash;
732                 hash ^= hash >> 16;
733                 hash ^= hash >> 8;
734
735                 slaves[i] = hash % slave_count;
736         }
737 }
738
739 void
740 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
741                 uint16_t slave_count, uint16_t *slaves)
742 {
743         struct rte_ether_hdr *eth_hdr;
744         uint16_t proto;
745         size_t vlan_offset;
746         int i;
747
748         struct rte_udp_hdr *udp_hdr;
749         struct rte_tcp_hdr *tcp_hdr;
750         uint32_t hash, l3hash, l4hash;
751
752         for (i = 0; i < nb_pkts; i++) {
753                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
754                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
755                 proto = eth_hdr->ether_type;
756                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
757                 l3hash = 0;
758                 l4hash = 0;
759
760                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
761                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
762                                         ((char *)(eth_hdr + 1) + vlan_offset);
763                         size_t ip_hdr_offset;
764
765                         l3hash = ipv4_hash(ipv4_hdr);
766
767                         /* there is no L4 header in fragmented packet */
768                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
769                                                                 == 0)) {
770                                 ip_hdr_offset = (ipv4_hdr->version_ihl
771                                         & RTE_IPV4_HDR_IHL_MASK) *
772                                         RTE_IPV4_IHL_MULTIPLIER;
773
774                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
775                                         tcp_hdr = (struct rte_tcp_hdr *)
776                                                 ((char *)ipv4_hdr +
777                                                         ip_hdr_offset);
778                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
779                                                         < pkt_end)
780                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
781                                 } else if (ipv4_hdr->next_proto_id ==
782                                                                 IPPROTO_UDP) {
783                                         udp_hdr = (struct rte_udp_hdr *)
784                                                 ((char *)ipv4_hdr +
785                                                         ip_hdr_offset);
786                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
787                                                         < pkt_end)
788                                                 l4hash = HASH_L4_PORTS(udp_hdr);
789                                 }
790                         }
791                 } else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
792                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
793                                         ((char *)(eth_hdr + 1) + vlan_offset);
794                         l3hash = ipv6_hash(ipv6_hdr);
795
796                         if (ipv6_hdr->proto == IPPROTO_TCP) {
797                                 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
798                                 l4hash = HASH_L4_PORTS(tcp_hdr);
799                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
800                                 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
801                                 l4hash = HASH_L4_PORTS(udp_hdr);
802                         }
803                 }
804
805                 hash = l3hash ^ l4hash;
806                 hash ^= hash >> 16;
807                 hash ^= hash >> 8;
808
809                 slaves[i] = hash % slave_count;
810         }
811 }
812
813 struct bwg_slave {
814         uint64_t bwg_left_int;
815         uint64_t bwg_left_remainder;
816         uint16_t slave;
817 };
818
819 void
820 bond_tlb_activate_slave(struct bond_dev_private *internals) {
821         int i;
822
823         for (i = 0; i < internals->active_slave_count; i++) {
824                 tlb_last_obytets[internals->active_slaves[i]] = 0;
825         }
826 }
827
828 static int
829 bandwidth_cmp(const void *a, const void *b)
830 {
831         const struct bwg_slave *bwg_a = a;
832         const struct bwg_slave *bwg_b = b;
833         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
834         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
835                         (int64_t)bwg_a->bwg_left_remainder;
836         if (diff > 0)
837                 return 1;
838         else if (diff < 0)
839                 return -1;
840         else if (diff2 > 0)
841                 return 1;
842         else if (diff2 < 0)
843                 return -1;
844         else
845                 return 0;
846 }
847
848 static void
849 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
850                 struct bwg_slave *bwg_slave)
851 {
852         struct rte_eth_link link_status;
853
854         rte_eth_link_get_nowait(port_id, &link_status);
855         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
856         if (link_bwg == 0)
857                 return;
858         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
859         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
860         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
861 }
862
863 static void
864 bond_ethdev_update_tlb_slave_cb(void *arg)
865 {
866         struct bond_dev_private *internals = arg;
867         struct rte_eth_stats slave_stats;
868         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
869         uint16_t slave_count;
870         uint64_t tx_bytes;
871
872         uint8_t update_stats = 0;
873         uint16_t slave_id;
874         uint16_t i;
875
876         internals->slave_update_idx++;
877
878
879         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
880                 update_stats = 1;
881
882         for (i = 0; i < internals->active_slave_count; i++) {
883                 slave_id = internals->active_slaves[i];
884                 rte_eth_stats_get(slave_id, &slave_stats);
885                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
886                 bandwidth_left(slave_id, tx_bytes,
887                                 internals->slave_update_idx, &bwg_array[i]);
888                 bwg_array[i].slave = slave_id;
889
890                 if (update_stats) {
891                         tlb_last_obytets[slave_id] = slave_stats.obytes;
892                 }
893         }
894
895         if (update_stats == 1)
896                 internals->slave_update_idx = 0;
897
898         slave_count = i;
899         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
900         for (i = 0; i < slave_count; i++)
901                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
902
903         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
904                         (struct bond_dev_private *)internals);
905 }
906
907 static uint16_t
908 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
909 {
910         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
911         struct bond_dev_private *internals = bd_tx_q->dev_private;
912
913         struct rte_eth_dev *primary_port =
914                         &rte_eth_devices[internals->primary_port];
915         uint16_t num_tx_total = 0;
916         uint16_t i, j;
917
918         uint16_t num_of_slaves = internals->active_slave_count;
919         uint16_t slaves[RTE_MAX_ETHPORTS];
920
921         struct rte_ether_hdr *ether_hdr;
922         struct rte_ether_addr primary_slave_addr;
923         struct rte_ether_addr active_slave_addr;
924
925         if (num_of_slaves < 1)
926                 return num_tx_total;
927
928         memcpy(slaves, internals->tlb_slaves_order,
929                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
930
931
932         rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
933
934         if (nb_pkts > 3) {
935                 for (i = 0; i < 3; i++)
936                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
937         }
938
939         for (i = 0; i < num_of_slaves; i++) {
940                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
941                 for (j = num_tx_total; j < nb_pkts; j++) {
942                         if (j + 3 < nb_pkts)
943                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
944
945                         ether_hdr = rte_pktmbuf_mtod(bufs[j],
946                                                 struct rte_ether_hdr *);
947                         if (rte_is_same_ether_addr(&ether_hdr->s_addr,
948                                                         &primary_slave_addr))
949                                 rte_ether_addr_copy(&active_slave_addr,
950                                                 &ether_hdr->s_addr);
951 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
952                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
953 #endif
954                 }
955
956                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
957                                 bufs + num_tx_total, nb_pkts - num_tx_total);
958
959                 if (num_tx_total == nb_pkts)
960                         break;
961         }
962
963         return num_tx_total;
964 }
965
966 void
967 bond_tlb_disable(struct bond_dev_private *internals)
968 {
969         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
970 }
971
972 void
973 bond_tlb_enable(struct bond_dev_private *internals)
974 {
975         bond_ethdev_update_tlb_slave_cb(internals);
976 }
977
978 static uint16_t
979 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
980 {
981         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
982         struct bond_dev_private *internals = bd_tx_q->dev_private;
983
984         struct rte_ether_hdr *eth_h;
985         uint16_t ether_type, offset;
986
987         struct client_data *client_info;
988
989         /*
990          * We create transmit buffers for every slave and one additional to send
991          * through tlb. In worst case every packet will be send on one port.
992          */
993         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
994         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
995
996         /*
997          * We create separate transmit buffers for update packets as they won't
998          * be counted in num_tx_total.
999          */
1000         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1001         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1002
1003         struct rte_mbuf *upd_pkt;
1004         size_t pkt_size;
1005
1006         uint16_t num_send, num_not_send = 0;
1007         uint16_t num_tx_total = 0;
1008         uint16_t slave_idx;
1009
1010         int i, j;
1011
1012         /* Search tx buffer for ARP packets and forward them to alb */
1013         for (i = 0; i < nb_pkts; i++) {
1014                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1015                 ether_type = eth_h->ether_type;
1016                 offset = get_vlan_offset(eth_h, &ether_type);
1017
1018                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1019                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1020
1021                         /* Change src mac in eth header */
1022                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1023
1024                         /* Add packet to slave tx buffer */
1025                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1026                         slave_bufs_pkts[slave_idx]++;
1027                 } else {
1028                         /* If packet is not ARP, send it with TLB policy */
1029                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1030                                         bufs[i];
1031                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1032                 }
1033         }
1034
1035         /* Update connected client ARP tables */
1036         if (internals->mode6.ntt) {
1037                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1038                         client_info = &internals->mode6.client_table[i];
1039
1040                         if (client_info->in_use) {
1041                                 /* Allocate new packet to send ARP update on current slave */
1042                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1043                                 if (upd_pkt == NULL) {
1044                                         RTE_BOND_LOG(ERR,
1045                                                      "Failed to allocate ARP packet from pool");
1046                                         continue;
1047                                 }
1048                                 pkt_size = sizeof(struct rte_ether_hdr) +
1049                                         sizeof(struct rte_arp_hdr) +
1050                                         client_info->vlan_count *
1051                                         sizeof(struct rte_vlan_hdr);
1052                                 upd_pkt->data_len = pkt_size;
1053                                 upd_pkt->pkt_len = pkt_size;
1054
1055                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1056                                                 internals);
1057
1058                                 /* Add packet to update tx buffer */
1059                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1060                                 update_bufs_pkts[slave_idx]++;
1061                         }
1062                 }
1063                 internals->mode6.ntt = 0;
1064         }
1065
1066         /* Send ARP packets on proper slaves */
1067         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1068                 if (slave_bufs_pkts[i] > 0) {
1069                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1070                                         slave_bufs[i], slave_bufs_pkts[i]);
1071                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1072                                 bufs[nb_pkts - 1 - num_not_send - j] =
1073                                                 slave_bufs[i][nb_pkts - 1 - j];
1074                         }
1075
1076                         num_tx_total += num_send;
1077                         num_not_send += slave_bufs_pkts[i] - num_send;
1078
1079 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1080         /* Print TX stats including update packets */
1081                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1082                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1083                                                         struct rte_ether_hdr *);
1084                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1085                         }
1086 #endif
1087                 }
1088         }
1089
1090         /* Send update packets on proper slaves */
1091         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1092                 if (update_bufs_pkts[i] > 0) {
1093                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1094                                         update_bufs_pkts[i]);
1095                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1096                                 rte_pktmbuf_free(update_bufs[i][j]);
1097                         }
1098 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1099                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1100                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1101                                                         struct rte_ether_hdr *);
1102                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1103                         }
1104 #endif
1105                 }
1106         }
1107
1108         /* Send non-ARP packets using tlb policy */
1109         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1110                 num_send = bond_ethdev_tx_burst_tlb(queue,
1111                                 slave_bufs[RTE_MAX_ETHPORTS],
1112                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1113
1114                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1115                         bufs[nb_pkts - 1 - num_not_send - j] =
1116                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1117                 }
1118
1119                 num_tx_total += num_send;
1120         }
1121
1122         return num_tx_total;
1123 }
1124
1125 static inline uint16_t
1126 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1127                  uint16_t *slave_port_ids, uint16_t slave_count)
1128 {
1129         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1130         struct bond_dev_private *internals = bd_tx_q->dev_private;
1131
1132         /* Array to sort mbufs for transmission on each slave into */
1133         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1134         /* Number of mbufs for transmission on each slave */
1135         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1136         /* Mapping array generated by hash function to map mbufs to slaves */
1137         uint16_t bufs_slave_port_idxs[nb_bufs];
1138
1139         uint16_t slave_tx_count;
1140         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1141
1142         uint16_t i;
1143
1144         /*
1145          * Populate slaves mbuf with the packets which are to be sent on it
1146          * selecting output slave using hash based on xmit policy
1147          */
1148         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1149                         bufs_slave_port_idxs);
1150
1151         for (i = 0; i < nb_bufs; i++) {
1152                 /* Populate slave mbuf arrays with mbufs for that slave. */
1153                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1154
1155                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1156         }
1157
1158         /* Send packet burst on each slave device */
1159         for (i = 0; i < slave_count; i++) {
1160                 if (slave_nb_bufs[i] == 0)
1161                         continue;
1162
1163                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1164                                 bd_tx_q->queue_id, slave_bufs[i],
1165                                 slave_nb_bufs[i]);
1166
1167                 total_tx_count += slave_tx_count;
1168
1169                 /* If tx burst fails move packets to end of bufs */
1170                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1171                         int slave_tx_fail_count = slave_nb_bufs[i] -
1172                                         slave_tx_count;
1173                         total_tx_fail_count += slave_tx_fail_count;
1174                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1175                                &slave_bufs[i][slave_tx_count],
1176                                slave_tx_fail_count * sizeof(bufs[0]));
1177                 }
1178         }
1179
1180         return total_tx_count;
1181 }
1182
1183 static uint16_t
1184 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1185                 uint16_t nb_bufs)
1186 {
1187         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1188         struct bond_dev_private *internals = bd_tx_q->dev_private;
1189
1190         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1191         uint16_t slave_count;
1192
1193         if (unlikely(nb_bufs == 0))
1194                 return 0;
1195
1196         /* Copy slave list to protect against slave up/down changes during tx
1197          * bursting
1198          */
1199         slave_count = internals->active_slave_count;
1200         if (unlikely(slave_count < 1))
1201                 return 0;
1202
1203         memcpy(slave_port_ids, internals->active_slaves,
1204                         sizeof(slave_port_ids[0]) * slave_count);
1205         return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1206                                 slave_count);
1207 }
1208
1209 static inline uint16_t
1210 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1211                 bool dedicated_txq)
1212 {
1213         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1214         struct bond_dev_private *internals = bd_tx_q->dev_private;
1215
1216         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1217         uint16_t slave_count;
1218
1219         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1220         uint16_t dist_slave_count;
1221
1222         uint16_t slave_tx_count;
1223
1224         uint16_t i;
1225
1226         /* Copy slave list to protect against slave up/down changes during tx
1227          * bursting */
1228         slave_count = internals->active_slave_count;
1229         if (unlikely(slave_count < 1))
1230                 return 0;
1231
1232         memcpy(slave_port_ids, internals->active_slaves,
1233                         sizeof(slave_port_ids[0]) * slave_count);
1234
1235         if (dedicated_txq)
1236                 goto skip_tx_ring;
1237
1238         /* Check for LACP control packets and send if available */
1239         for (i = 0; i < slave_count; i++) {
1240                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1241                 struct rte_mbuf *ctrl_pkt = NULL;
1242
1243                 if (likely(rte_ring_empty(port->tx_ring)))
1244                         continue;
1245
1246                 if (rte_ring_dequeue(port->tx_ring,
1247                                      (void **)&ctrl_pkt) != -ENOENT) {
1248                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1249                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1250                         /*
1251                          * re-enqueue LAG control plane packets to buffering
1252                          * ring if transmission fails so the packet isn't lost.
1253                          */
1254                         if (slave_tx_count != 1)
1255                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1256                 }
1257         }
1258
1259 skip_tx_ring:
1260         if (unlikely(nb_bufs == 0))
1261                 return 0;
1262
1263         dist_slave_count = 0;
1264         for (i = 0; i < slave_count; i++) {
1265                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1266
1267                 if (ACTOR_STATE(port, DISTRIBUTING))
1268                         dist_slave_port_ids[dist_slave_count++] =
1269                                         slave_port_ids[i];
1270         }
1271
1272         if (unlikely(dist_slave_count < 1))
1273                 return 0;
1274
1275         return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1276                                 dist_slave_count);
1277 }
1278
1279 static uint16_t
1280 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1281                 uint16_t nb_bufs)
1282 {
1283         return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1284 }
1285
1286 static uint16_t
1287 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1288                 uint16_t nb_bufs)
1289 {
1290         return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1291 }
1292
1293 static uint16_t
1294 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1295                 uint16_t nb_pkts)
1296 {
1297         struct bond_dev_private *internals;
1298         struct bond_tx_queue *bd_tx_q;
1299
1300         uint16_t slaves[RTE_MAX_ETHPORTS];
1301         uint8_t tx_failed_flag = 0;
1302         uint16_t num_of_slaves;
1303
1304         uint16_t max_nb_of_tx_pkts = 0;
1305
1306         int slave_tx_total[RTE_MAX_ETHPORTS];
1307         int i, most_successful_tx_slave = -1;
1308
1309         bd_tx_q = (struct bond_tx_queue *)queue;
1310         internals = bd_tx_q->dev_private;
1311
1312         /* Copy slave list to protect against slave up/down changes during tx
1313          * bursting */
1314         num_of_slaves = internals->active_slave_count;
1315         memcpy(slaves, internals->active_slaves,
1316                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1317
1318         if (num_of_slaves < 1)
1319                 return 0;
1320
1321         /* Increment reference count on mbufs */
1322         for (i = 0; i < nb_pkts; i++)
1323                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1324
1325         /* Transmit burst on each active slave */
1326         for (i = 0; i < num_of_slaves; i++) {
1327                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1328                                         bufs, nb_pkts);
1329
1330                 if (unlikely(slave_tx_total[i] < nb_pkts))
1331                         tx_failed_flag = 1;
1332
1333                 /* record the value and slave index for the slave which transmits the
1334                  * maximum number of packets */
1335                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1336                         max_nb_of_tx_pkts = slave_tx_total[i];
1337                         most_successful_tx_slave = i;
1338                 }
1339         }
1340
1341         /* if slaves fail to transmit packets from burst, the calling application
1342          * is not expected to know about multiple references to packets so we must
1343          * handle failures of all packets except those of the most successful slave
1344          */
1345         if (unlikely(tx_failed_flag))
1346                 for (i = 0; i < num_of_slaves; i++)
1347                         if (i != most_successful_tx_slave)
1348                                 while (slave_tx_total[i] < nb_pkts)
1349                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1350
1351         return max_nb_of_tx_pkts;
1352 }
1353
1354 static void
1355 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1356 {
1357         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1358
1359         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1360                 /**
1361                  * If in mode 4 then save the link properties of the first
1362                  * slave, all subsequent slaves must match these properties
1363                  */
1364                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1365
1366                 bond_link->link_autoneg = slave_link->link_autoneg;
1367                 bond_link->link_duplex = slave_link->link_duplex;
1368                 bond_link->link_speed = slave_link->link_speed;
1369         } else {
1370                 /**
1371                  * In any other mode the link properties are set to default
1372                  * values of AUTONEG/DUPLEX
1373                  */
1374                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1375                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1376         }
1377 }
1378
1379 static int
1380 link_properties_valid(struct rte_eth_dev *ethdev,
1381                 struct rte_eth_link *slave_link)
1382 {
1383         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1384
1385         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1386                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1387
1388                 if (bond_link->link_duplex != slave_link->link_duplex ||
1389                         bond_link->link_autoneg != slave_link->link_autoneg ||
1390                         bond_link->link_speed != slave_link->link_speed)
1391                         return -1;
1392         }
1393
1394         return 0;
1395 }
1396
1397 int
1398 mac_address_get(struct rte_eth_dev *eth_dev,
1399                 struct rte_ether_addr *dst_mac_addr)
1400 {
1401         struct rte_ether_addr *mac_addr;
1402
1403         if (eth_dev == NULL) {
1404                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1405                 return -1;
1406         }
1407
1408         if (dst_mac_addr == NULL) {
1409                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1410                 return -1;
1411         }
1412
1413         mac_addr = eth_dev->data->mac_addrs;
1414
1415         rte_ether_addr_copy(mac_addr, dst_mac_addr);
1416         return 0;
1417 }
1418
1419 int
1420 mac_address_set(struct rte_eth_dev *eth_dev,
1421                 struct rte_ether_addr *new_mac_addr)
1422 {
1423         struct rte_ether_addr *mac_addr;
1424
1425         if (eth_dev == NULL) {
1426                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1427                 return -1;
1428         }
1429
1430         if (new_mac_addr == NULL) {
1431                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1432                 return -1;
1433         }
1434
1435         mac_addr = eth_dev->data->mac_addrs;
1436
1437         /* If new MAC is different to current MAC then update */
1438         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1439                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1440
1441         return 0;
1442 }
1443
1444 static const struct rte_ether_addr null_mac_addr;
1445
1446 /*
1447  * Add additional MAC addresses to the slave
1448  */
1449 int
1450 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1451                 uint16_t slave_port_id)
1452 {
1453         int i, ret;
1454         struct rte_ether_addr *mac_addr;
1455
1456         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1457                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1458                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1459                         break;
1460
1461                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1462                 if (ret < 0) {
1463                         /* rollback */
1464                         for (i--; i > 0; i--)
1465                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1466                                         &bonded_eth_dev->data->mac_addrs[i]);
1467                         return ret;
1468                 }
1469         }
1470
1471         return 0;
1472 }
1473
1474 /*
1475  * Remove additional MAC addresses from the slave
1476  */
1477 int
1478 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1479                 uint16_t slave_port_id)
1480 {
1481         int i, rc, ret;
1482         struct rte_ether_addr *mac_addr;
1483
1484         rc = 0;
1485         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1486                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1487                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1488                         break;
1489
1490                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1491                 /* save only the first error */
1492                 if (ret < 0 && rc == 0)
1493                         rc = ret;
1494         }
1495
1496         return rc;
1497 }
1498
1499 int
1500 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1501 {
1502         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1503         int i;
1504
1505         /* Update slave devices MAC addresses */
1506         if (internals->slave_count < 1)
1507                 return -1;
1508
1509         switch (internals->mode) {
1510         case BONDING_MODE_ROUND_ROBIN:
1511         case BONDING_MODE_BALANCE:
1512         case BONDING_MODE_BROADCAST:
1513                 for (i = 0; i < internals->slave_count; i++) {
1514                         if (rte_eth_dev_default_mac_addr_set(
1515                                         internals->slaves[i].port_id,
1516                                         bonded_eth_dev->data->mac_addrs)) {
1517                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1518                                                 internals->slaves[i].port_id);
1519                                 return -1;
1520                         }
1521                 }
1522                 break;
1523         case BONDING_MODE_8023AD:
1524                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1525                 break;
1526         case BONDING_MODE_ACTIVE_BACKUP:
1527         case BONDING_MODE_TLB:
1528         case BONDING_MODE_ALB:
1529         default:
1530                 for (i = 0; i < internals->slave_count; i++) {
1531                         if (internals->slaves[i].port_id ==
1532                                         internals->current_primary_port) {
1533                                 if (rte_eth_dev_default_mac_addr_set(
1534                                                 internals->primary_port,
1535                                                 bonded_eth_dev->data->mac_addrs)) {
1536                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1537                                                         internals->current_primary_port);
1538                                         return -1;
1539                                 }
1540                         } else {
1541                                 if (rte_eth_dev_default_mac_addr_set(
1542                                                 internals->slaves[i].port_id,
1543                                                 &internals->slaves[i].persisted_mac_addr)) {
1544                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1545                                                         internals->slaves[i].port_id);
1546                                         return -1;
1547                                 }
1548                         }
1549                 }
1550         }
1551
1552         return 0;
1553 }
1554
1555 int
1556 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1557 {
1558         struct bond_dev_private *internals;
1559
1560         internals = eth_dev->data->dev_private;
1561
1562         switch (mode) {
1563         case BONDING_MODE_ROUND_ROBIN:
1564                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1565                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1566                 break;
1567         case BONDING_MODE_ACTIVE_BACKUP:
1568                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1569                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1570                 break;
1571         case BONDING_MODE_BALANCE:
1572                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1573                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1574                 break;
1575         case BONDING_MODE_BROADCAST:
1576                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1577                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1578                 break;
1579         case BONDING_MODE_8023AD:
1580                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1581                         return -1;
1582
1583                 if (internals->mode4.dedicated_queues.enabled == 0) {
1584                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1585                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1586                         RTE_BOND_LOG(WARNING,
1587                                 "Using mode 4, it is necessary to do TX burst "
1588                                 "and RX burst at least every 100ms.");
1589                 } else {
1590                         /* Use flow director's optimization */
1591                         eth_dev->rx_pkt_burst =
1592                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1593                         eth_dev->tx_pkt_burst =
1594                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1595                 }
1596                 break;
1597         case BONDING_MODE_TLB:
1598                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1599                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1600                 break;
1601         case BONDING_MODE_ALB:
1602                 if (bond_mode_alb_enable(eth_dev) != 0)
1603                         return -1;
1604
1605                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1606                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1607                 break;
1608         default:
1609                 return -1;
1610         }
1611
1612         internals->mode = mode;
1613
1614         return 0;
1615 }
1616
1617
1618 static int
1619 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1620                 struct rte_eth_dev *slave_eth_dev)
1621 {
1622         int errval = 0;
1623         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1624         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1625
1626         if (port->slow_pool == NULL) {
1627                 char mem_name[256];
1628                 int slave_id = slave_eth_dev->data->port_id;
1629
1630                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1631                                 slave_id);
1632                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1633                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1634                         slave_eth_dev->data->numa_node);
1635
1636                 /* Any memory allocation failure in initialization is critical because
1637                  * resources can't be free, so reinitialization is impossible. */
1638                 if (port->slow_pool == NULL) {
1639                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1640                                 slave_id, mem_name, rte_strerror(rte_errno));
1641                 }
1642         }
1643
1644         if (internals->mode4.dedicated_queues.enabled == 1) {
1645                 /* Configure slow Rx queue */
1646
1647                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1648                                 internals->mode4.dedicated_queues.rx_qid, 128,
1649                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1650                                 NULL, port->slow_pool);
1651                 if (errval != 0) {
1652                         RTE_BOND_LOG(ERR,
1653                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1654                                         slave_eth_dev->data->port_id,
1655                                         internals->mode4.dedicated_queues.rx_qid,
1656                                         errval);
1657                         return errval;
1658                 }
1659
1660                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1661                                 internals->mode4.dedicated_queues.tx_qid, 512,
1662                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1663                                 NULL);
1664                 if (errval != 0) {
1665                         RTE_BOND_LOG(ERR,
1666                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1667                                 slave_eth_dev->data->port_id,
1668                                 internals->mode4.dedicated_queues.tx_qid,
1669                                 errval);
1670                         return errval;
1671                 }
1672         }
1673         return 0;
1674 }
1675
1676 int
1677 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1678                 struct rte_eth_dev *slave_eth_dev)
1679 {
1680         struct bond_rx_queue *bd_rx_q;
1681         struct bond_tx_queue *bd_tx_q;
1682         uint16_t nb_rx_queues;
1683         uint16_t nb_tx_queues;
1684
1685         int errval;
1686         uint16_t q_id;
1687         struct rte_flow_error flow_error;
1688
1689         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1690
1691         /* Stop slave */
1692         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1693
1694         /* Enable interrupts on slave device if supported */
1695         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1696                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1697
1698         /* If RSS is enabled for bonding, try to enable it for slaves  */
1699         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1700                 if (internals->rss_key_len != 0) {
1701                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1702                                         internals->rss_key_len;
1703                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1704                                         internals->rss_key;
1705                 } else {
1706                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1707                 }
1708
1709                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1710                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1711                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1712                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1713         }
1714
1715         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1716                         DEV_RX_OFFLOAD_VLAN_FILTER)
1717                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1718                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1719         else
1720                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1721                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1722
1723         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1724         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1725
1726         if (internals->mode == BONDING_MODE_8023AD) {
1727                 if (internals->mode4.dedicated_queues.enabled == 1) {
1728                         nb_rx_queues++;
1729                         nb_tx_queues++;
1730                 }
1731         }
1732
1733         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1734                                      bonded_eth_dev->data->mtu);
1735         if (errval != 0 && errval != -ENOTSUP) {
1736                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1737                                 slave_eth_dev->data->port_id, errval);
1738                 return errval;
1739         }
1740
1741         /* Configure device */
1742         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1743                         nb_rx_queues, nb_tx_queues,
1744                         &(slave_eth_dev->data->dev_conf));
1745         if (errval != 0) {
1746                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1747                                 slave_eth_dev->data->port_id, errval);
1748                 return errval;
1749         }
1750
1751         /* Setup Rx Queues */
1752         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1753                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1754
1755                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1756                                 bd_rx_q->nb_rx_desc,
1757                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1758                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1759                 if (errval != 0) {
1760                         RTE_BOND_LOG(ERR,
1761                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1762                                         slave_eth_dev->data->port_id, q_id, errval);
1763                         return errval;
1764                 }
1765         }
1766
1767         /* Setup Tx Queues */
1768         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1769                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1770
1771                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1772                                 bd_tx_q->nb_tx_desc,
1773                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1774                                 &bd_tx_q->tx_conf);
1775                 if (errval != 0) {
1776                         RTE_BOND_LOG(ERR,
1777                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1778                                 slave_eth_dev->data->port_id, q_id, errval);
1779                         return errval;
1780                 }
1781         }
1782
1783         if (internals->mode == BONDING_MODE_8023AD &&
1784                         internals->mode4.dedicated_queues.enabled == 1) {
1785                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1786                                 != 0)
1787                         return errval;
1788
1789                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1790                                 slave_eth_dev->data->port_id) != 0) {
1791                         RTE_BOND_LOG(ERR,
1792                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1793                                 slave_eth_dev->data->port_id, q_id, errval);
1794                         return -1;
1795                 }
1796
1797                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1798                         rte_flow_destroy(slave_eth_dev->data->port_id,
1799                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1800                                         &flow_error);
1801
1802                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1803                                 slave_eth_dev->data->port_id);
1804         }
1805
1806         /* Start device */
1807         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1808         if (errval != 0) {
1809                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1810                                 slave_eth_dev->data->port_id, errval);
1811                 return -1;
1812         }
1813
1814         /* If RSS is enabled for bonding, synchronize RETA */
1815         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1816                 int i;
1817                 struct bond_dev_private *internals;
1818
1819                 internals = bonded_eth_dev->data->dev_private;
1820
1821                 for (i = 0; i < internals->slave_count; i++) {
1822                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1823                                 errval = rte_eth_dev_rss_reta_update(
1824                                                 slave_eth_dev->data->port_id,
1825                                                 &internals->reta_conf[0],
1826                                                 internals->slaves[i].reta_size);
1827                                 if (errval != 0) {
1828                                         RTE_BOND_LOG(WARNING,
1829                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1830                                                      " RSS Configuration for bonding may be inconsistent.",
1831                                                      slave_eth_dev->data->port_id, errval);
1832                                 }
1833                                 break;
1834                         }
1835                 }
1836         }
1837
1838         /* If lsc interrupt is set, check initial slave's link status */
1839         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1840                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1841                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1842                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1843                         NULL);
1844         }
1845
1846         return 0;
1847 }
1848
1849 void
1850 slave_remove(struct bond_dev_private *internals,
1851                 struct rte_eth_dev *slave_eth_dev)
1852 {
1853         uint16_t i;
1854
1855         for (i = 0; i < internals->slave_count; i++)
1856                 if (internals->slaves[i].port_id ==
1857                                 slave_eth_dev->data->port_id)
1858                         break;
1859
1860         if (i < (internals->slave_count - 1)) {
1861                 struct rte_flow *flow;
1862
1863                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1864                                 sizeof(internals->slaves[0]) *
1865                                 (internals->slave_count - i - 1));
1866                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1867                         memmove(&flow->flows[i], &flow->flows[i + 1],
1868                                 sizeof(flow->flows[0]) *
1869                                 (internals->slave_count - i - 1));
1870                         flow->flows[internals->slave_count - 1] = NULL;
1871                 }
1872         }
1873
1874         internals->slave_count--;
1875
1876         /* force reconfiguration of slave interfaces */
1877         _rte_eth_dev_reset(slave_eth_dev);
1878 }
1879
1880 static void
1881 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1882
1883 void
1884 slave_add(struct bond_dev_private *internals,
1885                 struct rte_eth_dev *slave_eth_dev)
1886 {
1887         struct bond_slave_details *slave_details =
1888                         &internals->slaves[internals->slave_count];
1889
1890         slave_details->port_id = slave_eth_dev->data->port_id;
1891         slave_details->last_link_status = 0;
1892
1893         /* Mark slave devices that don't support interrupts so we can
1894          * compensate when we start the bond
1895          */
1896         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1897                 slave_details->link_status_poll_enabled = 1;
1898         }
1899
1900         slave_details->link_status_wait_to_complete = 0;
1901         /* clean tlb_last_obytes when adding port for bonding device */
1902         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1903                         sizeof(struct rte_ether_addr));
1904 }
1905
1906 void
1907 bond_ethdev_primary_set(struct bond_dev_private *internals,
1908                 uint16_t slave_port_id)
1909 {
1910         int i;
1911
1912         if (internals->active_slave_count < 1)
1913                 internals->current_primary_port = slave_port_id;
1914         else
1915                 /* Search bonded device slave ports for new proposed primary port */
1916                 for (i = 0; i < internals->active_slave_count; i++) {
1917                         if (internals->active_slaves[i] == slave_port_id)
1918                                 internals->current_primary_port = slave_port_id;
1919                 }
1920 }
1921
1922 static void
1923 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1924
1925 static int
1926 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1927 {
1928         struct bond_dev_private *internals;
1929         int i;
1930
1931         /* slave eth dev will be started by bonded device */
1932         if (check_for_bonded_ethdev(eth_dev)) {
1933                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1934                                 eth_dev->data->port_id);
1935                 return -1;
1936         }
1937
1938         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1939         eth_dev->data->dev_started = 1;
1940
1941         internals = eth_dev->data->dev_private;
1942
1943         if (internals->slave_count == 0) {
1944                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1945                 goto out_err;
1946         }
1947
1948         if (internals->user_defined_mac == 0) {
1949                 struct rte_ether_addr *new_mac_addr = NULL;
1950
1951                 for (i = 0; i < internals->slave_count; i++)
1952                         if (internals->slaves[i].port_id == internals->primary_port)
1953                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1954
1955                 if (new_mac_addr == NULL)
1956                         goto out_err;
1957
1958                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1959                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1960                                         eth_dev->data->port_id);
1961                         goto out_err;
1962                 }
1963         }
1964
1965         /* If bonded device is configure in promiscuous mode then re-apply config */
1966         if (internals->promiscuous_en)
1967                 bond_ethdev_promiscuous_enable(eth_dev);
1968
1969         if (internals->mode == BONDING_MODE_8023AD) {
1970                 if (internals->mode4.dedicated_queues.enabled == 1) {
1971                         internals->mode4.dedicated_queues.rx_qid =
1972                                         eth_dev->data->nb_rx_queues;
1973                         internals->mode4.dedicated_queues.tx_qid =
1974                                         eth_dev->data->nb_tx_queues;
1975                 }
1976         }
1977
1978
1979         /* Reconfigure each slave device if starting bonded device */
1980         for (i = 0; i < internals->slave_count; i++) {
1981                 struct rte_eth_dev *slave_ethdev =
1982                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1983                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1984                         RTE_BOND_LOG(ERR,
1985                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1986                                 eth_dev->data->port_id,
1987                                 internals->slaves[i].port_id);
1988                         goto out_err;
1989                 }
1990                 /* We will need to poll for link status if any slave doesn't
1991                  * support interrupts
1992                  */
1993                 if (internals->slaves[i].link_status_poll_enabled)
1994                         internals->link_status_polling_enabled = 1;
1995         }
1996
1997         /* start polling if needed */
1998         if (internals->link_status_polling_enabled) {
1999                 rte_eal_alarm_set(
2000                         internals->link_status_polling_interval_ms * 1000,
2001                         bond_ethdev_slave_link_status_change_monitor,
2002                         (void *)&rte_eth_devices[internals->port_id]);
2003         }
2004
2005         /* Update all slave devices MACs*/
2006         if (mac_address_slaves_update(eth_dev) != 0)
2007                 goto out_err;
2008
2009         if (internals->user_defined_primary_port)
2010                 bond_ethdev_primary_set(internals, internals->primary_port);
2011
2012         if (internals->mode == BONDING_MODE_8023AD)
2013                 bond_mode_8023ad_start(eth_dev);
2014
2015         if (internals->mode == BONDING_MODE_TLB ||
2016                         internals->mode == BONDING_MODE_ALB)
2017                 bond_tlb_enable(internals);
2018
2019         return 0;
2020
2021 out_err:
2022         eth_dev->data->dev_started = 0;
2023         return -1;
2024 }
2025
2026 static void
2027 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2028 {
2029         uint16_t i;
2030
2031         if (dev->data->rx_queues != NULL) {
2032                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2033                         rte_free(dev->data->rx_queues[i]);
2034                         dev->data->rx_queues[i] = NULL;
2035                 }
2036                 dev->data->nb_rx_queues = 0;
2037         }
2038
2039         if (dev->data->tx_queues != NULL) {
2040                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2041                         rte_free(dev->data->tx_queues[i]);
2042                         dev->data->tx_queues[i] = NULL;
2043                 }
2044                 dev->data->nb_tx_queues = 0;
2045         }
2046 }
2047
2048 void
2049 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2050 {
2051         struct bond_dev_private *internals = eth_dev->data->dev_private;
2052         uint16_t i;
2053
2054         if (internals->mode == BONDING_MODE_8023AD) {
2055                 struct port *port;
2056                 void *pkt = NULL;
2057
2058                 bond_mode_8023ad_stop(eth_dev);
2059
2060                 /* Discard all messages to/from mode 4 state machines */
2061                 for (i = 0; i < internals->active_slave_count; i++) {
2062                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2063
2064                         RTE_ASSERT(port->rx_ring != NULL);
2065                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2066                                 rte_pktmbuf_free(pkt);
2067
2068                         RTE_ASSERT(port->tx_ring != NULL);
2069                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2070                                 rte_pktmbuf_free(pkt);
2071                 }
2072         }
2073
2074         if (internals->mode == BONDING_MODE_TLB ||
2075                         internals->mode == BONDING_MODE_ALB) {
2076                 bond_tlb_disable(internals);
2077                 for (i = 0; i < internals->active_slave_count; i++)
2078                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2079         }
2080
2081         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2082         eth_dev->data->dev_started = 0;
2083
2084         internals->link_status_polling_enabled = 0;
2085         for (i = 0; i < internals->slave_count; i++) {
2086                 uint16_t slave_id = internals->slaves[i].port_id;
2087                 if (find_slave_by_id(internals->active_slaves,
2088                                 internals->active_slave_count, slave_id) !=
2089                                                 internals->active_slave_count) {
2090                         internals->slaves[i].last_link_status = 0;
2091                         rte_eth_dev_stop(slave_id);
2092                         deactivate_slave(eth_dev, slave_id);
2093                 }
2094         }
2095 }
2096
2097 void
2098 bond_ethdev_close(struct rte_eth_dev *dev)
2099 {
2100         struct bond_dev_private *internals = dev->data->dev_private;
2101         uint16_t bond_port_id = internals->port_id;
2102         int skipped = 0;
2103         struct rte_flow_error ferror;
2104
2105         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2106         while (internals->slave_count != skipped) {
2107                 uint16_t port_id = internals->slaves[skipped].port_id;
2108
2109                 rte_eth_dev_stop(port_id);
2110
2111                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2112                         RTE_BOND_LOG(ERR,
2113                                      "Failed to remove port %d from bonded device %s",
2114                                      port_id, dev->device->name);
2115                         skipped++;
2116                 }
2117         }
2118         bond_flow_ops.flush(dev, &ferror);
2119         bond_ethdev_free_queues(dev);
2120         rte_bitmap_reset(internals->vlan_filter_bmp);
2121 }
2122
2123 /* forward declaration */
2124 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2125
2126 static void
2127 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2128 {
2129         struct bond_dev_private *internals = dev->data->dev_private;
2130
2131         uint16_t max_nb_rx_queues = UINT16_MAX;
2132         uint16_t max_nb_tx_queues = UINT16_MAX;
2133         uint16_t max_rx_desc_lim = UINT16_MAX;
2134         uint16_t max_tx_desc_lim = UINT16_MAX;
2135
2136         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2137
2138         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2139                         internals->candidate_max_rx_pktlen :
2140                         RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2141
2142         /* Max number of tx/rx queues that the bonded device can support is the
2143          * minimum values of the bonded slaves, as all slaves must be capable
2144          * of supporting the same number of tx/rx queues.
2145          */
2146         if (internals->slave_count > 0) {
2147                 struct rte_eth_dev_info slave_info;
2148                 uint16_t idx;
2149
2150                 for (idx = 0; idx < internals->slave_count; idx++) {
2151                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2152                                         &slave_info);
2153
2154                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2155                                 max_nb_rx_queues = slave_info.max_rx_queues;
2156
2157                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2158                                 max_nb_tx_queues = slave_info.max_tx_queues;
2159
2160                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2161                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2162
2163                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2164                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2165                 }
2166         }
2167
2168         dev_info->max_rx_queues = max_nb_rx_queues;
2169         dev_info->max_tx_queues = max_nb_tx_queues;
2170
2171         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2172                sizeof(dev_info->default_rxconf));
2173         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2174                sizeof(dev_info->default_txconf));
2175
2176         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2177         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2178
2179         /**
2180          * If dedicated hw queues enabled for link bonding device in LACP mode
2181          * then we need to reduce the maximum number of data path queues by 1.
2182          */
2183         if (internals->mode == BONDING_MODE_8023AD &&
2184                 internals->mode4.dedicated_queues.enabled == 1) {
2185                 dev_info->max_rx_queues--;
2186                 dev_info->max_tx_queues--;
2187         }
2188
2189         dev_info->min_rx_bufsize = 0;
2190
2191         dev_info->rx_offload_capa = internals->rx_offload_capa;
2192         dev_info->tx_offload_capa = internals->tx_offload_capa;
2193         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2194         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2195         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2196
2197         dev_info->reta_size = internals->reta_size;
2198 }
2199
2200 static int
2201 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2202 {
2203         int res;
2204         uint16_t i;
2205         struct bond_dev_private *internals = dev->data->dev_private;
2206
2207         /* don't do this while a slave is being added */
2208         rte_spinlock_lock(&internals->lock);
2209
2210         if (on)
2211                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2212         else
2213                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2214
2215         for (i = 0; i < internals->slave_count; i++) {
2216                 uint16_t port_id = internals->slaves[i].port_id;
2217
2218                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2219                 if (res == ENOTSUP)
2220                         RTE_BOND_LOG(WARNING,
2221                                      "Setting VLAN filter on slave port %u not supported.",
2222                                      port_id);
2223         }
2224
2225         rte_spinlock_unlock(&internals->lock);
2226         return 0;
2227 }
2228
2229 static int
2230 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2231                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2232                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2233 {
2234         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2235                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2236                                         0, dev->data->numa_node);
2237         if (bd_rx_q == NULL)
2238                 return -1;
2239
2240         bd_rx_q->queue_id = rx_queue_id;
2241         bd_rx_q->dev_private = dev->data->dev_private;
2242
2243         bd_rx_q->nb_rx_desc = nb_rx_desc;
2244
2245         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2246         bd_rx_q->mb_pool = mb_pool;
2247
2248         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2249
2250         return 0;
2251 }
2252
2253 static int
2254 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2255                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2256                 const struct rte_eth_txconf *tx_conf)
2257 {
2258         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2259                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2260                                         0, dev->data->numa_node);
2261
2262         if (bd_tx_q == NULL)
2263                 return -1;
2264
2265         bd_tx_q->queue_id = tx_queue_id;
2266         bd_tx_q->dev_private = dev->data->dev_private;
2267
2268         bd_tx_q->nb_tx_desc = nb_tx_desc;
2269         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2270
2271         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2272
2273         return 0;
2274 }
2275
2276 static void
2277 bond_ethdev_rx_queue_release(void *queue)
2278 {
2279         if (queue == NULL)
2280                 return;
2281
2282         rte_free(queue);
2283 }
2284
2285 static void
2286 bond_ethdev_tx_queue_release(void *queue)
2287 {
2288         if (queue == NULL)
2289                 return;
2290
2291         rte_free(queue);
2292 }
2293
2294 static void
2295 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2296 {
2297         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2298         struct bond_dev_private *internals;
2299
2300         /* Default value for polling slave found is true as we don't want to
2301          * disable the polling thread if we cannot get the lock */
2302         int i, polling_slave_found = 1;
2303
2304         if (cb_arg == NULL)
2305                 return;
2306
2307         bonded_ethdev = cb_arg;
2308         internals = bonded_ethdev->data->dev_private;
2309
2310         if (!bonded_ethdev->data->dev_started ||
2311                 !internals->link_status_polling_enabled)
2312                 return;
2313
2314         /* If device is currently being configured then don't check slaves link
2315          * status, wait until next period */
2316         if (rte_spinlock_trylock(&internals->lock)) {
2317                 if (internals->slave_count > 0)
2318                         polling_slave_found = 0;
2319
2320                 for (i = 0; i < internals->slave_count; i++) {
2321                         if (!internals->slaves[i].link_status_poll_enabled)
2322                                 continue;
2323
2324                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2325                         polling_slave_found = 1;
2326
2327                         /* Update slave link status */
2328                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2329                                         internals->slaves[i].link_status_wait_to_complete);
2330
2331                         /* if link status has changed since last checked then call lsc
2332                          * event callback */
2333                         if (slave_ethdev->data->dev_link.link_status !=
2334                                         internals->slaves[i].last_link_status) {
2335                                 internals->slaves[i].last_link_status =
2336                                                 slave_ethdev->data->dev_link.link_status;
2337
2338                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2339                                                 RTE_ETH_EVENT_INTR_LSC,
2340                                                 &bonded_ethdev->data->port_id,
2341                                                 NULL);
2342                         }
2343                 }
2344                 rte_spinlock_unlock(&internals->lock);
2345         }
2346
2347         if (polling_slave_found)
2348                 /* Set alarm to continue monitoring link status of slave ethdev's */
2349                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2350                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2351 }
2352
2353 static int
2354 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2355 {
2356         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2357
2358         struct bond_dev_private *bond_ctx;
2359         struct rte_eth_link slave_link;
2360
2361         uint32_t idx;
2362
2363         bond_ctx = ethdev->data->dev_private;
2364
2365         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2366
2367         if (ethdev->data->dev_started == 0 ||
2368                         bond_ctx->active_slave_count == 0) {
2369                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2370                 return 0;
2371         }
2372
2373         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2374
2375         if (wait_to_complete)
2376                 link_update = rte_eth_link_get;
2377         else
2378                 link_update = rte_eth_link_get_nowait;
2379
2380         switch (bond_ctx->mode) {
2381         case BONDING_MODE_BROADCAST:
2382                 /**
2383                  * Setting link speed to UINT32_MAX to ensure we pick up the
2384                  * value of the first active slave
2385                  */
2386                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2387
2388                 /**
2389                  * link speed is minimum value of all the slaves link speed as
2390                  * packet loss will occur on this slave if transmission at rates
2391                  * greater than this are attempted
2392                  */
2393                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2394                         link_update(bond_ctx->active_slaves[0], &slave_link);
2395
2396                         if (slave_link.link_speed <
2397                                         ethdev->data->dev_link.link_speed)
2398                                 ethdev->data->dev_link.link_speed =
2399                                                 slave_link.link_speed;
2400                 }
2401                 break;
2402         case BONDING_MODE_ACTIVE_BACKUP:
2403                 /* Current primary slave */
2404                 link_update(bond_ctx->current_primary_port, &slave_link);
2405
2406                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2407                 break;
2408         case BONDING_MODE_8023AD:
2409                 ethdev->data->dev_link.link_autoneg =
2410                                 bond_ctx->mode4.slave_link.link_autoneg;
2411                 ethdev->data->dev_link.link_duplex =
2412                                 bond_ctx->mode4.slave_link.link_duplex;
2413                 /* fall through to update link speed */
2414         case BONDING_MODE_ROUND_ROBIN:
2415         case BONDING_MODE_BALANCE:
2416         case BONDING_MODE_TLB:
2417         case BONDING_MODE_ALB:
2418         default:
2419                 /**
2420                  * In theses mode the maximum theoretical link speed is the sum
2421                  * of all the slaves
2422                  */
2423                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2424
2425                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2426                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2427
2428                         ethdev->data->dev_link.link_speed +=
2429                                         slave_link.link_speed;
2430                 }
2431         }
2432
2433
2434         return 0;
2435 }
2436
2437
2438 static int
2439 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2440 {
2441         struct bond_dev_private *internals = dev->data->dev_private;
2442         struct rte_eth_stats slave_stats;
2443         int i, j;
2444
2445         for (i = 0; i < internals->slave_count; i++) {
2446                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2447
2448                 stats->ipackets += slave_stats.ipackets;
2449                 stats->opackets += slave_stats.opackets;
2450                 stats->ibytes += slave_stats.ibytes;
2451                 stats->obytes += slave_stats.obytes;
2452                 stats->imissed += slave_stats.imissed;
2453                 stats->ierrors += slave_stats.ierrors;
2454                 stats->oerrors += slave_stats.oerrors;
2455                 stats->rx_nombuf += slave_stats.rx_nombuf;
2456
2457                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2458                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2459                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2460                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2461                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2462                         stats->q_errors[j] += slave_stats.q_errors[j];
2463                 }
2464
2465         }
2466
2467         return 0;
2468 }
2469
2470 static void
2471 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2472 {
2473         struct bond_dev_private *internals = dev->data->dev_private;
2474         int i;
2475
2476         for (i = 0; i < internals->slave_count; i++)
2477                 rte_eth_stats_reset(internals->slaves[i].port_id);
2478 }
2479
2480 static void
2481 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2482 {
2483         struct bond_dev_private *internals = eth_dev->data->dev_private;
2484         int i;
2485
2486         internals->promiscuous_en = 1;
2487
2488         switch (internals->mode) {
2489         /* Promiscuous mode is propagated to all slaves */
2490         case BONDING_MODE_ROUND_ROBIN:
2491         case BONDING_MODE_BALANCE:
2492         case BONDING_MODE_BROADCAST:
2493                 for (i = 0; i < internals->slave_count; i++)
2494                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2495                 break;
2496         /* In mode4 promiscus mode is managed when slave is added/removed */
2497         case BONDING_MODE_8023AD:
2498                 break;
2499         /* Promiscuous mode is propagated only to primary slave */
2500         case BONDING_MODE_ACTIVE_BACKUP:
2501         case BONDING_MODE_TLB:
2502         case BONDING_MODE_ALB:
2503         default:
2504                 /* Do not touch promisc when there cannot be primary ports */
2505                 if (internals->slave_count == 0)
2506                         break;
2507                 rte_eth_promiscuous_enable(internals->current_primary_port);
2508         }
2509 }
2510
2511 static void
2512 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2513 {
2514         struct bond_dev_private *internals = dev->data->dev_private;
2515         int i;
2516
2517         internals->promiscuous_en = 0;
2518
2519         switch (internals->mode) {
2520         /* Promiscuous mode is propagated to all slaves */
2521         case BONDING_MODE_ROUND_ROBIN:
2522         case BONDING_MODE_BALANCE:
2523         case BONDING_MODE_BROADCAST:
2524                 for (i = 0; i < internals->slave_count; i++)
2525                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2526                 break;
2527         /* In mode4 promiscus mode is set managed when slave is added/removed */
2528         case BONDING_MODE_8023AD:
2529                 break;
2530         /* Promiscuous mode is propagated only to primary slave */
2531         case BONDING_MODE_ACTIVE_BACKUP:
2532         case BONDING_MODE_TLB:
2533         case BONDING_MODE_ALB:
2534         default:
2535                 /* Do not touch promisc when there cannot be primary ports */
2536                 if (internals->slave_count == 0)
2537                         break;
2538                 rte_eth_promiscuous_disable(internals->current_primary_port);
2539         }
2540 }
2541
2542 static void
2543 bond_ethdev_delayed_lsc_propagation(void *arg)
2544 {
2545         if (arg == NULL)
2546                 return;
2547
2548         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2549                         RTE_ETH_EVENT_INTR_LSC, NULL);
2550 }
2551
2552 int
2553 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2554                 void *param, void *ret_param __rte_unused)
2555 {
2556         struct rte_eth_dev *bonded_eth_dev;
2557         struct bond_dev_private *internals;
2558         struct rte_eth_link link;
2559         int rc = -1;
2560
2561         uint8_t lsc_flag = 0;
2562         int valid_slave = 0;
2563         uint16_t active_pos;
2564         uint16_t i;
2565
2566         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2567                 return rc;
2568
2569         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2570
2571         if (check_for_bonded_ethdev(bonded_eth_dev))
2572                 return rc;
2573
2574         internals = bonded_eth_dev->data->dev_private;
2575
2576         /* If the device isn't started don't handle interrupts */
2577         if (!bonded_eth_dev->data->dev_started)
2578                 return rc;
2579
2580         /* verify that port_id is a valid slave of bonded port */
2581         for (i = 0; i < internals->slave_count; i++) {
2582                 if (internals->slaves[i].port_id == port_id) {
2583                         valid_slave = 1;
2584                         break;
2585                 }
2586         }
2587
2588         if (!valid_slave)
2589                 return rc;
2590
2591         /* Synchronize lsc callback parallel calls either by real link event
2592          * from the slaves PMDs or by the bonding PMD itself.
2593          */
2594         rte_spinlock_lock(&internals->lsc_lock);
2595
2596         /* Search for port in active port list */
2597         active_pos = find_slave_by_id(internals->active_slaves,
2598                         internals->active_slave_count, port_id);
2599
2600         rte_eth_link_get_nowait(port_id, &link);
2601         if (link.link_status) {
2602                 if (active_pos < internals->active_slave_count)
2603                         goto link_update;
2604
2605                 /* check link state properties if bonded link is up*/
2606                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2607                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2608                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2609                                              "for slave %d in bonding mode %d",
2610                                              port_id, internals->mode);
2611                 } else {
2612                         /* inherit slave link properties */
2613                         link_properties_set(bonded_eth_dev, &link);
2614                 }
2615
2616                 /* If no active slave ports then set this port to be
2617                  * the primary port.
2618                  */
2619                 if (internals->active_slave_count < 1) {
2620                         /* If first active slave, then change link status */
2621                         bonded_eth_dev->data->dev_link.link_status =
2622                                                                 ETH_LINK_UP;
2623                         internals->current_primary_port = port_id;
2624                         lsc_flag = 1;
2625
2626                         mac_address_slaves_update(bonded_eth_dev);
2627                 }
2628
2629                 activate_slave(bonded_eth_dev, port_id);
2630
2631                 /* If the user has defined the primary port then default to
2632                  * using it.
2633                  */
2634                 if (internals->user_defined_primary_port &&
2635                                 internals->primary_port == port_id)
2636                         bond_ethdev_primary_set(internals, port_id);
2637         } else {
2638                 if (active_pos == internals->active_slave_count)
2639                         goto link_update;
2640
2641                 /* Remove from active slave list */
2642                 deactivate_slave(bonded_eth_dev, port_id);
2643
2644                 if (internals->active_slave_count < 1)
2645                         lsc_flag = 1;
2646
2647                 /* Update primary id, take first active slave from list or if none
2648                  * available set to -1 */
2649                 if (port_id == internals->current_primary_port) {
2650                         if (internals->active_slave_count > 0)
2651                                 bond_ethdev_primary_set(internals,
2652                                                 internals->active_slaves[0]);
2653                         else
2654                                 internals->current_primary_port = internals->primary_port;
2655                 }
2656         }
2657
2658 link_update:
2659         /**
2660          * Update bonded device link properties after any change to active
2661          * slaves
2662          */
2663         bond_ethdev_link_update(bonded_eth_dev, 0);
2664
2665         if (lsc_flag) {
2666                 /* Cancel any possible outstanding interrupts if delays are enabled */
2667                 if (internals->link_up_delay_ms > 0 ||
2668                         internals->link_down_delay_ms > 0)
2669                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2670                                         bonded_eth_dev);
2671
2672                 if (bonded_eth_dev->data->dev_link.link_status) {
2673                         if (internals->link_up_delay_ms > 0)
2674                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2675                                                 bond_ethdev_delayed_lsc_propagation,
2676                                                 (void *)bonded_eth_dev);
2677                         else
2678                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2679                                                 RTE_ETH_EVENT_INTR_LSC,
2680                                                 NULL);
2681
2682                 } else {
2683                         if (internals->link_down_delay_ms > 0)
2684                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2685                                                 bond_ethdev_delayed_lsc_propagation,
2686                                                 (void *)bonded_eth_dev);
2687                         else
2688                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2689                                                 RTE_ETH_EVENT_INTR_LSC,
2690                                                 NULL);
2691                 }
2692         }
2693
2694         rte_spinlock_unlock(&internals->lsc_lock);
2695
2696         return rc;
2697 }
2698
2699 static int
2700 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2701                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2702 {
2703         unsigned i, j;
2704         int result = 0;
2705         int slave_reta_size;
2706         unsigned reta_count;
2707         struct bond_dev_private *internals = dev->data->dev_private;
2708
2709         if (reta_size != internals->reta_size)
2710                 return -EINVAL;
2711
2712          /* Copy RETA table */
2713         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2714
2715         for (i = 0; i < reta_count; i++) {
2716                 internals->reta_conf[i].mask = reta_conf[i].mask;
2717                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2718                         if ((reta_conf[i].mask >> j) & 0x01)
2719                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2720         }
2721
2722         /* Fill rest of array */
2723         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2724                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2725                                 sizeof(internals->reta_conf[0]) * reta_count);
2726
2727         /* Propagate RETA over slaves */
2728         for (i = 0; i < internals->slave_count; i++) {
2729                 slave_reta_size = internals->slaves[i].reta_size;
2730                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2731                                 &internals->reta_conf[0], slave_reta_size);
2732                 if (result < 0)
2733                         return result;
2734         }
2735
2736         return 0;
2737 }
2738
2739 static int
2740 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2741                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2742 {
2743         int i, j;
2744         struct bond_dev_private *internals = dev->data->dev_private;
2745
2746         if (reta_size != internals->reta_size)
2747                 return -EINVAL;
2748
2749          /* Copy RETA table */
2750         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2751                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2752                         if ((reta_conf[i].mask >> j) & 0x01)
2753                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2754
2755         return 0;
2756 }
2757
2758 static int
2759 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2760                 struct rte_eth_rss_conf *rss_conf)
2761 {
2762         int i, result = 0;
2763         struct bond_dev_private *internals = dev->data->dev_private;
2764         struct rte_eth_rss_conf bond_rss_conf;
2765
2766         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2767
2768         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2769
2770         if (bond_rss_conf.rss_hf != 0)
2771                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2772
2773         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2774                         sizeof(internals->rss_key)) {
2775                 if (bond_rss_conf.rss_key_len == 0)
2776                         bond_rss_conf.rss_key_len = 40;
2777                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2778                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2779                                 internals->rss_key_len);
2780         }
2781
2782         for (i = 0; i < internals->slave_count; i++) {
2783                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2784                                 &bond_rss_conf);
2785                 if (result < 0)
2786                         return result;
2787         }
2788
2789         return 0;
2790 }
2791
2792 static int
2793 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2794                 struct rte_eth_rss_conf *rss_conf)
2795 {
2796         struct bond_dev_private *internals = dev->data->dev_private;
2797
2798         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2799         rss_conf->rss_key_len = internals->rss_key_len;
2800         if (rss_conf->rss_key)
2801                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2802
2803         return 0;
2804 }
2805
2806 static int
2807 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2808 {
2809         struct rte_eth_dev *slave_eth_dev;
2810         struct bond_dev_private *internals = dev->data->dev_private;
2811         int ret, i;
2812
2813         rte_spinlock_lock(&internals->lock);
2814
2815         for (i = 0; i < internals->slave_count; i++) {
2816                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2817                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2818                         rte_spinlock_unlock(&internals->lock);
2819                         return -ENOTSUP;
2820                 }
2821         }
2822         for (i = 0; i < internals->slave_count; i++) {
2823                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2824                 if (ret < 0) {
2825                         rte_spinlock_unlock(&internals->lock);
2826                         return ret;
2827                 }
2828         }
2829
2830         rte_spinlock_unlock(&internals->lock);
2831         return 0;
2832 }
2833
2834 static int
2835 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2836                         struct rte_ether_addr *addr)
2837 {
2838         if (mac_address_set(dev, addr)) {
2839                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2840                 return -EINVAL;
2841         }
2842
2843         return 0;
2844 }
2845
2846 static int
2847 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2848                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2849 {
2850         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2851                 *(const void **)arg = &bond_flow_ops;
2852                 return 0;
2853         }
2854         return -ENOTSUP;
2855 }
2856
2857 static int
2858 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2859                         struct rte_ether_addr *mac_addr,
2860                         __rte_unused uint32_t index, uint32_t vmdq)
2861 {
2862         struct rte_eth_dev *slave_eth_dev;
2863         struct bond_dev_private *internals = dev->data->dev_private;
2864         int ret, i;
2865
2866         rte_spinlock_lock(&internals->lock);
2867
2868         for (i = 0; i < internals->slave_count; i++) {
2869                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2870                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2871                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2872                         ret = -ENOTSUP;
2873                         goto end;
2874                 }
2875         }
2876
2877         for (i = 0; i < internals->slave_count; i++) {
2878                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2879                                 mac_addr, vmdq);
2880                 if (ret < 0) {
2881                         /* rollback */
2882                         for (i--; i >= 0; i--)
2883                                 rte_eth_dev_mac_addr_remove(
2884                                         internals->slaves[i].port_id, mac_addr);
2885                         goto end;
2886                 }
2887         }
2888
2889         ret = 0;
2890 end:
2891         rte_spinlock_unlock(&internals->lock);
2892         return ret;
2893 }
2894
2895 static void
2896 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2897 {
2898         struct rte_eth_dev *slave_eth_dev;
2899         struct bond_dev_private *internals = dev->data->dev_private;
2900         int i;
2901
2902         rte_spinlock_lock(&internals->lock);
2903
2904         for (i = 0; i < internals->slave_count; i++) {
2905                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2906                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2907                         goto end;
2908         }
2909
2910         struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
2911
2912         for (i = 0; i < internals->slave_count; i++)
2913                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2914                                 mac_addr);
2915
2916 end:
2917         rte_spinlock_unlock(&internals->lock);
2918 }
2919
2920 const struct eth_dev_ops default_dev_ops = {
2921         .dev_start            = bond_ethdev_start,
2922         .dev_stop             = bond_ethdev_stop,
2923         .dev_close            = bond_ethdev_close,
2924         .dev_configure        = bond_ethdev_configure,
2925         .dev_infos_get        = bond_ethdev_info,
2926         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2927         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2928         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2929         .rx_queue_release     = bond_ethdev_rx_queue_release,
2930         .tx_queue_release     = bond_ethdev_tx_queue_release,
2931         .link_update          = bond_ethdev_link_update,
2932         .stats_get            = bond_ethdev_stats_get,
2933         .stats_reset          = bond_ethdev_stats_reset,
2934         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2935         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2936         .reta_update          = bond_ethdev_rss_reta_update,
2937         .reta_query           = bond_ethdev_rss_reta_query,
2938         .rss_hash_update      = bond_ethdev_rss_hash_update,
2939         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2940         .mtu_set              = bond_ethdev_mtu_set,
2941         .mac_addr_set         = bond_ethdev_mac_address_set,
2942         .mac_addr_add         = bond_ethdev_mac_addr_add,
2943         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
2944         .filter_ctrl          = bond_filter_ctrl
2945 };
2946
2947 static int
2948 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2949 {
2950         const char *name = rte_vdev_device_name(dev);
2951         uint8_t socket_id = dev->device.numa_node;
2952         struct bond_dev_private *internals = NULL;
2953         struct rte_eth_dev *eth_dev = NULL;
2954         uint32_t vlan_filter_bmp_size;
2955
2956         /* now do all data allocation - for eth_dev structure, dummy pci driver
2957          * and internal (private) data
2958          */
2959
2960         /* reserve an ethdev entry */
2961         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2962         if (eth_dev == NULL) {
2963                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2964                 goto err;
2965         }
2966
2967         internals = eth_dev->data->dev_private;
2968         eth_dev->data->nb_rx_queues = (uint16_t)1;
2969         eth_dev->data->nb_tx_queues = (uint16_t)1;
2970
2971         /* Allocate memory for storing MAC addresses */
2972         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
2973                         BOND_MAX_MAC_ADDRS, 0, socket_id);
2974         if (eth_dev->data->mac_addrs == NULL) {
2975                 RTE_BOND_LOG(ERR,
2976                              "Failed to allocate %u bytes needed to store MAC addresses",
2977                              RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
2978                 goto err;
2979         }
2980
2981         eth_dev->dev_ops = &default_dev_ops;
2982         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2983
2984         rte_spinlock_init(&internals->lock);
2985         rte_spinlock_init(&internals->lsc_lock);
2986
2987         internals->port_id = eth_dev->data->port_id;
2988         internals->mode = BONDING_MODE_INVALID;
2989         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2990         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2991         internals->burst_xmit_hash = burst_xmit_l2_hash;
2992         internals->user_defined_mac = 0;
2993
2994         internals->link_status_polling_enabled = 0;
2995
2996         internals->link_status_polling_interval_ms =
2997                 DEFAULT_POLLING_INTERVAL_10_MS;
2998         internals->link_down_delay_ms = 0;
2999         internals->link_up_delay_ms = 0;
3000
3001         internals->slave_count = 0;
3002         internals->active_slave_count = 0;
3003         internals->rx_offload_capa = 0;
3004         internals->tx_offload_capa = 0;
3005         internals->rx_queue_offload_capa = 0;
3006         internals->tx_queue_offload_capa = 0;
3007         internals->candidate_max_rx_pktlen = 0;
3008         internals->max_rx_pktlen = 0;
3009
3010         /* Initially allow to choose any offload type */
3011         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3012
3013         memset(&internals->default_rxconf, 0,
3014                sizeof(internals->default_rxconf));
3015         memset(&internals->default_txconf, 0,
3016                sizeof(internals->default_txconf));
3017
3018         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3019         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3020
3021         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3022         memset(internals->slaves, 0, sizeof(internals->slaves));
3023
3024         TAILQ_INIT(&internals->flow_list);
3025         internals->flow_isolated_valid = 0;
3026
3027         /* Set mode 4 default configuration */
3028         bond_mode_8023ad_setup(eth_dev, NULL);
3029         if (bond_ethdev_mode_set(eth_dev, mode)) {
3030                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3031                                  eth_dev->data->port_id, mode);
3032                 goto err;
3033         }
3034
3035         vlan_filter_bmp_size =
3036                 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3037         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3038                                                    RTE_CACHE_LINE_SIZE);
3039         if (internals->vlan_filter_bmpmem == NULL) {
3040                 RTE_BOND_LOG(ERR,
3041                              "Failed to allocate vlan bitmap for bonded device %u",
3042                              eth_dev->data->port_id);
3043                 goto err;
3044         }
3045
3046         internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3047                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3048         if (internals->vlan_filter_bmp == NULL) {
3049                 RTE_BOND_LOG(ERR,
3050                              "Failed to init vlan bitmap for bonded device %u",
3051                              eth_dev->data->port_id);
3052                 rte_free(internals->vlan_filter_bmpmem);
3053                 goto err;
3054         }
3055
3056         return eth_dev->data->port_id;
3057
3058 err:
3059         rte_free(internals);
3060         if (eth_dev != NULL)
3061                 eth_dev->data->dev_private = NULL;
3062         rte_eth_dev_release_port(eth_dev);
3063         return -1;
3064 }
3065
3066 static int
3067 bond_probe(struct rte_vdev_device *dev)
3068 {
3069         const char *name;
3070         struct bond_dev_private *internals;
3071         struct rte_kvargs *kvlist;
3072         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3073         int  arg_count, port_id;
3074         uint8_t agg_mode;
3075         struct rte_eth_dev *eth_dev;
3076
3077         if (!dev)
3078                 return -EINVAL;
3079
3080         name = rte_vdev_device_name(dev);
3081         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3082
3083         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3084                 eth_dev = rte_eth_dev_attach_secondary(name);
3085                 if (!eth_dev) {
3086                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3087                         return -1;
3088                 }
3089                 /* TODO: request info from primary to set up Rx and Tx */
3090                 eth_dev->dev_ops = &default_dev_ops;
3091                 eth_dev->device = &dev->device;
3092                 rte_eth_dev_probing_finish(eth_dev);
3093                 return 0;
3094         }
3095
3096         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3097                 pmd_bond_init_valid_arguments);
3098         if (kvlist == NULL)
3099                 return -1;
3100
3101         /* Parse link bonding mode */
3102         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3103                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3104                                 &bond_ethdev_parse_slave_mode_kvarg,
3105                                 &bonding_mode) != 0) {
3106                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3107                                         name);
3108                         goto parse_error;
3109                 }
3110         } else {
3111                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3112                                 "device %s", name);
3113                 goto parse_error;
3114         }
3115
3116         /* Parse socket id to create bonding device on */
3117         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3118         if (arg_count == 1) {
3119                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3120                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3121                                 != 0) {
3122                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3123                                         "bonded device %s", name);
3124                         goto parse_error;
3125                 }
3126         } else if (arg_count > 1) {
3127                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3128                                 "bonded device %s", name);
3129                 goto parse_error;
3130         } else {
3131                 socket_id = rte_socket_id();
3132         }
3133
3134         dev->device.numa_node = socket_id;
3135
3136         /* Create link bonding eth device */
3137         port_id = bond_alloc(dev, bonding_mode);
3138         if (port_id < 0) {
3139                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3140                                 "socket %u.",   name, bonding_mode, socket_id);
3141                 goto parse_error;
3142         }
3143         internals = rte_eth_devices[port_id].data->dev_private;
3144         internals->kvlist = kvlist;
3145
3146         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3147                 if (rte_kvargs_process(kvlist,
3148                                 PMD_BOND_AGG_MODE_KVARG,
3149                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3150                                 &agg_mode) != 0) {
3151                         RTE_BOND_LOG(ERR,
3152                                         "Failed to parse agg selection mode for bonded device %s",
3153                                         name);
3154                         goto parse_error;
3155                 }
3156
3157                 if (internals->mode == BONDING_MODE_8023AD)
3158                         internals->mode4.agg_selection = agg_mode;
3159         } else {
3160                 internals->mode4.agg_selection = AGG_STABLE;
3161         }
3162
3163         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3164         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3165                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3166         return 0;
3167
3168 parse_error:
3169         rte_kvargs_free(kvlist);
3170
3171         return -1;
3172 }
3173
3174 static int
3175 bond_remove(struct rte_vdev_device *dev)
3176 {
3177         struct rte_eth_dev *eth_dev;
3178         struct bond_dev_private *internals;
3179         const char *name;
3180
3181         if (!dev)
3182                 return -EINVAL;
3183
3184         name = rte_vdev_device_name(dev);
3185         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3186
3187         /* now free all data allocation - for eth_dev structure,
3188          * dummy pci driver and internal (private) data
3189          */
3190
3191         /* find an ethdev entry */
3192         eth_dev = rte_eth_dev_allocated(name);
3193         if (eth_dev == NULL)
3194                 return -ENODEV;
3195
3196         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3197                 return rte_eth_dev_release_port(eth_dev);
3198
3199         RTE_ASSERT(eth_dev->device == &dev->device);
3200
3201         internals = eth_dev->data->dev_private;
3202         if (internals->slave_count != 0)
3203                 return -EBUSY;
3204
3205         if (eth_dev->data->dev_started == 1) {
3206                 bond_ethdev_stop(eth_dev);
3207                 bond_ethdev_close(eth_dev);
3208         }
3209
3210         eth_dev->dev_ops = NULL;
3211         eth_dev->rx_pkt_burst = NULL;
3212         eth_dev->tx_pkt_burst = NULL;
3213
3214         internals = eth_dev->data->dev_private;
3215         /* Try to release mempool used in mode6. If the bond
3216          * device is not mode6, free the NULL is not problem.
3217          */
3218         rte_mempool_free(internals->mode6.mempool);
3219         rte_bitmap_free(internals->vlan_filter_bmp);
3220         rte_free(internals->vlan_filter_bmpmem);
3221
3222         rte_eth_dev_release_port(eth_dev);
3223
3224         return 0;
3225 }
3226
3227 /* this part will resolve the slave portids after all the other pdev and vdev
3228  * have been allocated */
3229 static int
3230 bond_ethdev_configure(struct rte_eth_dev *dev)
3231 {
3232         const char *name = dev->device->name;
3233         struct bond_dev_private *internals = dev->data->dev_private;
3234         struct rte_kvargs *kvlist = internals->kvlist;
3235         int arg_count;
3236         uint16_t port_id = dev - rte_eth_devices;
3237         uint8_t agg_mode;
3238
3239         static const uint8_t default_rss_key[40] = {
3240                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3241                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3242                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3243                 0xBE, 0xAC, 0x01, 0xFA
3244         };
3245
3246         unsigned i, j;
3247
3248         /*
3249          * If RSS is enabled, fill table with default values and
3250          * set key to the the value specified in port RSS configuration.
3251          * Fall back to default RSS key if the key is not specified
3252          */
3253         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3254                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3255                         internals->rss_key_len =
3256                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3257                         memcpy(internals->rss_key,
3258                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3259                                internals->rss_key_len);
3260                 } else {
3261                         internals->rss_key_len = sizeof(default_rss_key);
3262                         memcpy(internals->rss_key, default_rss_key,
3263                                internals->rss_key_len);
3264                 }
3265
3266                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3267                         internals->reta_conf[i].mask = ~0LL;
3268                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3269                                 internals->reta_conf[i].reta[j] =
3270                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3271                                                 dev->data->nb_rx_queues;
3272                 }
3273         }
3274
3275         /* set the max_rx_pktlen */
3276         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3277
3278         /*
3279          * if no kvlist, it means that this bonded device has been created
3280          * through the bonding api.
3281          */
3282         if (!kvlist)
3283                 return 0;
3284
3285         /* Parse MAC address for bonded device */
3286         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3287         if (arg_count == 1) {
3288                 struct rte_ether_addr bond_mac;
3289
3290                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3291                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3292                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3293                                      name);
3294                         return -1;
3295                 }
3296
3297                 /* Set MAC address */
3298                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3299                         RTE_BOND_LOG(ERR,
3300                                      "Failed to set mac address on bonded device %s",
3301                                      name);
3302                         return -1;
3303                 }
3304         } else if (arg_count > 1) {
3305                 RTE_BOND_LOG(ERR,
3306                              "MAC address can be specified only once for bonded device %s",
3307                              name);
3308                 return -1;
3309         }
3310
3311         /* Parse/set balance mode transmit policy */
3312         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3313         if (arg_count == 1) {
3314                 uint8_t xmit_policy;
3315
3316                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3317                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3318                     0) {
3319                         RTE_BOND_LOG(INFO,
3320                                      "Invalid xmit policy specified for bonded device %s",
3321                                      name);
3322                         return -1;
3323                 }
3324
3325                 /* Set balance mode transmit policy*/
3326                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3327                         RTE_BOND_LOG(ERR,
3328                                      "Failed to set balance xmit policy on bonded device %s",
3329                                      name);
3330                         return -1;
3331                 }
3332         } else if (arg_count > 1) {
3333                 RTE_BOND_LOG(ERR,
3334                              "Transmit policy can be specified only once for bonded device %s",
3335                              name);
3336                 return -1;
3337         }
3338
3339         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3340                 if (rte_kvargs_process(kvlist,
3341                                        PMD_BOND_AGG_MODE_KVARG,
3342                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3343                                        &agg_mode) != 0) {
3344                         RTE_BOND_LOG(ERR,
3345                                      "Failed to parse agg selection mode for bonded device %s",
3346                                      name);
3347                 }
3348                 if (internals->mode == BONDING_MODE_8023AD) {
3349                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3350                                         agg_mode);
3351                         if (ret < 0) {
3352                                 RTE_BOND_LOG(ERR,
3353                                         "Invalid args for agg selection set for bonded device %s",
3354                                         name);
3355                                 return -1;
3356                         }
3357                 }
3358         }
3359
3360         /* Parse/add slave ports to bonded device */
3361         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3362                 struct bond_ethdev_slave_ports slave_ports;
3363                 unsigned i;
3364
3365                 memset(&slave_ports, 0, sizeof(slave_ports));
3366
3367                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3368                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3369                         RTE_BOND_LOG(ERR,
3370                                      "Failed to parse slave ports for bonded device %s",
3371                                      name);
3372                         return -1;
3373                 }
3374
3375                 for (i = 0; i < slave_ports.slave_count; i++) {
3376                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3377                                 RTE_BOND_LOG(ERR,
3378                                              "Failed to add port %d as slave to bonded device %s",
3379                                              slave_ports.slaves[i], name);
3380                         }
3381                 }
3382
3383         } else {
3384                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3385                 return -1;
3386         }
3387
3388         /* Parse/set primary slave port id*/
3389         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3390         if (arg_count == 1) {
3391                 uint16_t primary_slave_port_id;
3392
3393                 if (rte_kvargs_process(kvlist,
3394                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3395                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3396                                        &primary_slave_port_id) < 0) {
3397                         RTE_BOND_LOG(INFO,
3398                                      "Invalid primary slave port id specified for bonded device %s",
3399                                      name);
3400                         return -1;
3401                 }
3402
3403                 /* Set balance mode transmit policy*/
3404                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3405                     != 0) {
3406                         RTE_BOND_LOG(ERR,
3407                                      "Failed to set primary slave port %d on bonded device %s",
3408                                      primary_slave_port_id, name);
3409                         return -1;
3410                 }
3411         } else if (arg_count > 1) {
3412                 RTE_BOND_LOG(INFO,
3413                              "Primary slave can be specified only once for bonded device %s",
3414                              name);
3415                 return -1;
3416         }
3417
3418         /* Parse link status monitor polling interval */
3419         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3420         if (arg_count == 1) {
3421                 uint32_t lsc_poll_interval_ms;
3422
3423                 if (rte_kvargs_process(kvlist,
3424                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3425                                        &bond_ethdev_parse_time_ms_kvarg,
3426                                        &lsc_poll_interval_ms) < 0) {
3427                         RTE_BOND_LOG(INFO,
3428                                      "Invalid lsc polling interval value specified for bonded"
3429                                      " device %s", name);
3430                         return -1;
3431                 }
3432
3433                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3434                     != 0) {
3435                         RTE_BOND_LOG(ERR,
3436                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3437                                      lsc_poll_interval_ms, name);
3438                         return -1;
3439                 }
3440         } else if (arg_count > 1) {
3441                 RTE_BOND_LOG(INFO,
3442                              "LSC polling interval can be specified only once for bonded"
3443                              " device %s", name);
3444                 return -1;
3445         }
3446
3447         /* Parse link up interrupt propagation delay */
3448         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3449         if (arg_count == 1) {
3450                 uint32_t link_up_delay_ms;
3451
3452                 if (rte_kvargs_process(kvlist,
3453                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3454                                        &bond_ethdev_parse_time_ms_kvarg,
3455                                        &link_up_delay_ms) < 0) {
3456                         RTE_BOND_LOG(INFO,
3457                                      "Invalid link up propagation delay value specified for"
3458                                      " bonded device %s", name);
3459                         return -1;
3460                 }
3461
3462                 /* Set balance mode transmit policy*/
3463                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3464                     != 0) {
3465                         RTE_BOND_LOG(ERR,
3466                                      "Failed to set link up propagation delay (%u ms) on bonded"
3467                                      " device %s", link_up_delay_ms, name);
3468                         return -1;
3469                 }
3470         } else if (arg_count > 1) {
3471                 RTE_BOND_LOG(INFO,
3472                              "Link up propagation delay can be specified only once for"
3473                              " bonded device %s", name);
3474                 return -1;
3475         }
3476
3477         /* Parse link down interrupt propagation delay */
3478         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3479         if (arg_count == 1) {
3480                 uint32_t link_down_delay_ms;
3481
3482                 if (rte_kvargs_process(kvlist,
3483                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3484                                        &bond_ethdev_parse_time_ms_kvarg,
3485                                        &link_down_delay_ms) < 0) {
3486                         RTE_BOND_LOG(INFO,
3487                                      "Invalid link down propagation delay value specified for"
3488                                      " bonded device %s", name);
3489                         return -1;
3490                 }
3491
3492                 /* Set balance mode transmit policy*/
3493                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3494                     != 0) {
3495                         RTE_BOND_LOG(ERR,
3496                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3497                                      link_down_delay_ms, name);
3498                         return -1;
3499                 }
3500         } else if (arg_count > 1) {
3501                 RTE_BOND_LOG(INFO,
3502                              "Link down propagation delay can be specified only once for  bonded device %s",
3503                              name);
3504                 return -1;
3505         }
3506
3507         return 0;
3508 }
3509
3510 struct rte_vdev_driver pmd_bond_drv = {
3511         .probe = bond_probe,
3512         .remove = bond_remove,
3513 };
3514
3515 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3516 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3517
3518 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3519         "slave=<ifc> "
3520         "primary=<ifc> "
3521         "mode=[0-6] "
3522         "xmit_policy=[l2 | l23 | l34] "
3523         "agg_mode=[count | stable | bandwidth] "
3524         "socket_id=<int> "
3525         "mac=<mac addr> "
3526         "lsc_poll_period_ms=<int> "
3527         "up_delay=<int> "
3528         "down_delay=<int>");
3529
3530 int bond_logtype;
3531
3532 RTE_INIT(bond_init_log)
3533 {
3534         bond_logtype = rte_log_register("pmd.net.bond");
3535         if (bond_logtype >= 0)
3536                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3537 }